diff options
1252 files changed, 23655 insertions, 20525 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2a4bc78c27ec..65731b060e3f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2227,7 +2227,7 @@ forcing Dual Address Cycle for PCI cards supporting greater than 32-bit addressing. - iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour + iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour Format: { "0" | "1" } 0 - Lazy mode. Request that DMA unmap operations use deferred @@ -3596,6 +3596,13 @@ [NFS] set the TCP port on which the NFSv4 callback channel should listen. + nfs.delay_retrans= + [NFS] specifies the number of times the NFSv4 client + retries the request before returning an EAGAIN error, + after a reply of NFS4ERR_DELAY from the server. + Only applies if the softerr mount option is enabled, + and the specified value is >= 0. + nfs.enable_ino64= [NFS] enable 64-bit inode numbers. If zero, the NFS client will fake up a 32-bit inode @@ -5687,9 +5694,10 @@ s390_iommu= [HW,S390] Set s390 IOTLB flushing mode strict - With strict flushing every unmap operation will result in - an IOTLB flush. Default is lazy flushing before reuse, - which is faster. + With strict flushing every unmap operation will result + in an IOTLB flush. Default is lazy flushing before + reuse, which is faster. Deprecated, equivalent to + iommu.strict=1. s390_iommu_aperture= [KNL,S390] Specifies the size of the per device DMA address space diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 4b8399ac592b..ced7b335e2e0 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -174,7 +174,7 @@ HWCAP2_DCPODP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. HWCAP2_SVE2 - Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. + Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001. HWCAP2_SVEAES Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001. @@ -222,7 +222,7 @@ HWCAP2_RNG Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001. HWCAP2_BTI - Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. + Functionality implied by ID_AA64PFR1_EL1.BT == 0b0001. HWCAP2_MTE Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described @@ -232,7 +232,7 @@ HWCAP2_ECV Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001. HWCAP2_AFP - Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001. + Functionality implied by ID_AA64MMFR1_EL1.AFP == 0b0001. HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index a52996b22f75..7b2384de471f 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -77,6 +77,9 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined in version 1.0 of the Bit-Manipulation ISA extensions. + * :c:macro:`RISCV_HWPROBE_EXT_ZICBOZ`: The Zicboz extension is supported, as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance information about the selected set of processors. @@ -96,3 +99,6 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are not supported at all and will generate a misaligned address fault. + +* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which + represents the size of the Zicboz block in bytes. diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst index 8960fac42c40..54d199dce78b 100644 --- a/Documentation/arch/riscv/uabi.rst +++ b/Documentation/arch/riscv/uabi.rst @@ -42,6 +42,26 @@ An example string following the order is:: rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux +"isa" and "hart isa" lines in /proc/cpuinfo +------------------------------------------- + +The "isa" line in /proc/cpuinfo describes the lowest common denominator of +RISC-V ISA extensions recognized by the kernel and implemented on all harts. The +"hart isa" line, in contrast, describes the set of extensions recognized by the +kernel on the particular hart being described, even if those extensions may not +be present on all harts in the system. + +In both lines, the presence of an extension guarantees only that the hardware +has the described capability. Additional kernel support or policy changes may be +required before an extension's capability is fully usable by userspace programs. +Similarly, for S-mode extensions, presence in one of these lines does not +guarantee that the kernel is taking advantage of the extension, or that the +feature will be visible in guest VMs managed by this kernel. + +Inversely, the absence of an extension in these lines does not necessarily mean +the hardware does not support that feature. The running kernel may not recognize +the extension, or may have deliberately removed it from the listing. + Misaligned accesses ------------------- diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 0d2647fb358d..723408e399ab 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -37,16 +37,14 @@ prototype in a header for the wrapper kfunc. An example is given below:: /* Disables missing prototype warnings */ - __diag_push(); - __diag_ignore_all("-Wmissing-prototypes", - "Global kfuncs as their definitions will be in BTF"); + __bpf_kfunc_start_defs(); __bpf_kfunc struct task_struct *bpf_find_get_task_by_vpid(pid_t nr) { return find_get_task_by_vpid(nr); } - __diag_pop(); + __bpf_kfunc_end_defs(); A wrapper kfunc is often needed when we need to annotate parameters of the kfunc. Otherwise one may directly make the kfunc visible to the BPF program by diff --git a/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml new file mode 100644 index 000000000000..9816c4cacc7d --- /dev/null +++ b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/renesas,shmobile-lcdc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas SH-Mobile LCD Controller (LCDC) + +maintainers: + - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> + - Geert Uytterhoeven <geert+renesas@glider.be> + +properties: + compatible: + enum: + - renesas,r8a7740-lcdc # R-Mobile A1 + - renesas,sh73a0-lcdc # SH-Mobile AG5 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 1 + maxItems: 5 + description: + Only the functional clock is mandatory. + Some of the optional clocks are model-dependent (e.g. "video" (a.k.a. + "vou" or "dv_clk") is available on R-Mobile A1 only). + + clock-names: + minItems: 1 + items: + - const: fck + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + + power-domains: + maxItems: 1 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: LCD port (R-Mobile A1 and SH-Mobile AG5) + unevaluatedProperties: false + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: HDMI port (R-Mobile A1 LCDC1 and SH-Mobile AG5) + unevaluatedProperties: false + + port@2: + $ref: /schemas/graph.yaml#/properties/port + description: MIPI-DSI port (SH-Mobile AG5) + unevaluatedProperties: false + + required: + - port@0 + + unevaluatedProperties: false + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + - ports + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + contains: + const: renesas,r8a7740-lcdc + then: + properties: + ports: + properties: + port@2: false + + - if: + properties: + compatible: + contains: + const: renesas,sh73a0-lcdc + then: + properties: + ports: + required: + - port@1 + - port@2 + +examples: + - | + #include <dt-bindings/clock/r8a7740-clock.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + lcd-controller@fe940000 { + compatible = "renesas,r8a7740-lcdc"; + reg = <0xfe940000 0x4000>; + interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&mstp1_clks R8A7740_CLK_LCDC0>, + <&cpg_clocks R8A7740_CLK_M3>, <&lcdlclk0_clk>, + <&vou_clk>; + clock-names = "fck", "media", "lclk", "video"; + power-domains = <&pd_a4lc>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lcdc0_rgb: endpoint { + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml index 0aa41bd9ddca..37975ee61c5a 100644 --- a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml +++ b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml @@ -11,10 +11,10 @@ maintainers: properties: compatible: - - enum: - - solomon,ssd1322 - - solomon,ssd1325 - - solomon,ssd1327 + enum: + - solomon,ssd1322 + - solomon,ssd1325 + - solomon,ssd1327 required: - compatible diff --git a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml index e5a3c355ee1f..29921aab9d97 100644 --- a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml +++ b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml @@ -24,6 +24,8 @@ properties: linux,keycodes: maxItems: 1 + wakeup-source: true + required: - compatible - linux,keycodes diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml index 4080422a9eb5..037e5d3c447f 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml @@ -34,6 +34,9 @@ properties: vdd-supply: description: Regulator for voltage. + vddio-supply: + description: Optional Regulator for I/O voltage. + reset-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b1b2cf81b42f..aa9e1c0895a5 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -110,6 +110,7 @@ properties: - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 - description: Qcom Adreno GPUs on Google Cheza platform @@ -409,6 +410,7 @@ allOf: contains: enum: - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 then: diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml index 6ffbed204c25..8f50e23ca8c9 100644 --- a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml @@ -15,12 +15,19 @@ allOf: properties: compatible: - enum: - - fsl,imx23-pwm + oneOf: + - const: fsl,imx23-pwm + - items: + - enum: + - fsl,imx28-pwm + - const: fsl,imx23-pwm reg: maxItems: 1 + clocks: + maxItems: 1 + "#pwm-cells": const: 3 @@ -31,6 +38,7 @@ properties: required: - compatible - reg + - clocks - fsl,pwm-number additionalProperties: false @@ -40,6 +48,7 @@ examples: pwm@80064000 { compatible = "fsl,imx23-pwm"; reg = <0x80064000 0x2000>; + clocks = <&clks 30>; #pwm-cells = <3>; fsl,pwm-number = <8>; }; diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml index 895415772d1d..09102dda4942 100644 --- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml @@ -21,6 +21,7 @@ properties: - mediatek,mt8188-scp - mediatek,mt8192-scp - mediatek,mt8195-scp + - mediatek,mt8195-scp-dual reg: description: @@ -31,10 +32,7 @@ properties: reg-names: minItems: 2 - items: - - const: sram - - const: cfg - - const: l1tcm + maxItems: 3 clocks: description: @@ -58,6 +56,93 @@ properties: memory-region: maxItems: 1 + cros-ec-rpmsg: + $ref: /schemas/mfd/google,cros-ec.yaml + description: + This subnode represents the rpmsg device. The properties + of this node are defined by the individual bindings for + the rpmsg devices. + + required: + - mediatek,rpmsg-name + + unevaluatedProperties: false + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + + ranges: + description: + Standard ranges definition providing address translations for + local SCP SRAM address spaces to bus addresses. + +patternProperties: + "^scp@[a-f0-9]+$": + type: object + description: + The MediaTek SCP integrated to SoC might be a multi-core version. + The other cores are represented as child nodes of the boot core. + There are some integration differences for the IP like the usage of + address translator for translating SoC bus addresses into address space + for the processor. + + Each SCP core has own cache memory. The SRAM and L1TCM are shared by + cores. The power of cache, SRAM and L1TCM power should be enabled + before booting SCP cores. The size of cache, SRAM, and L1TCM are varied + on differnt SoCs. + + The SCP cores do not use an MMU, but has a set of registers to + control the translations between 32-bit CPU addresses into system bus + addresses. Cache and memory access settings are provided through a + Memory Protection Unit (MPU), programmable only from the SCP. + + properties: + compatible: + enum: + - mediatek,scp-core + + reg: + description: The base address and size of SRAM. + maxItems: 1 + + reg-names: + const: sram + + interrupts: + maxItems: 1 + + firmware-name: + $ref: /schemas/types.yaml#/definitions/string + description: + If present, name (or relative path) of the file within the + firmware search path containing the firmware image used when + initializing sub cores of multi-core SCP. + + memory-region: + maxItems: 1 + + cros-ec-rpmsg: + $ref: /schemas/mfd/google,cros-ec.yaml + description: + This subnode represents the rpmsg device. The properties + of this node are defined by the individual bindings for + the rpmsg devices. + + required: + - mediatek,rpmsg-name + + unevaluatedProperties: false + + required: + - compatible + - reg + - reg-names + + additionalProperties: false + required: - compatible - reg @@ -87,23 +172,39 @@ allOf: reg: maxItems: 2 reg-names: + items: + - const: sram + - const: cfg + - if: + properties: + compatible: + enum: + - mediatek,mt8192-scp + - mediatek,mt8195-scp + then: + properties: + reg: + maxItems: 3 + reg-names: + items: + - const: sram + - const: cfg + - const: l1tcm + - if: + properties: + compatible: + enum: + - mediatek,mt8195-scp-dual + then: + properties: + reg: maxItems: 2 + reg-names: + items: + - const: cfg + - const: l1tcm -additionalProperties: - type: object - description: - Subnodes of the SCP represent rpmsg devices. The names of the devices - are not important. The properties of these nodes are defined by the - individual bindings for the rpmsg devices. - properties: - mediatek,rpmsg-name: - $ref: /schemas/types.yaml#/definitions/string-array - description: - Contains the name for the rpmsg device. Used to match - the subnode to rpmsg device announced by SCP. - - required: - - mediatek,rpmsg-name +additionalProperties: false examples: - | @@ -118,7 +219,42 @@ examples: clocks = <&infracfg CLK_INFRA_SCPSYS>; clock-names = "main"; - cros_ec { + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; }; + + - | + scp@10500000 { + compatible = "mediatek,mt8195-scp-dual"; + reg = <0x10720000 0xe0000>, + <0x10700000 0x8000>; + reg-names = "cfg", "l1tcm"; + + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x10500000 0x100000>; + + scp@0 { + compatible = "mediatek,scp-core"; + reg = <0x0 0xa0000>; + reg-names = "sram"; + + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; + mediatek,rpmsg-name = "cros-ec-rpmsg"; + }; + }; + + scp@a0000 { + compatible = "mediatek,scp-core"; + reg = <0xa0000 0x20000>; + reg-names = "sram"; + + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; + mediatek,rpmsg-name = "cros-ec-rpmsg"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml index a2b0079de039..661c2b425da3 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml @@ -66,7 +66,9 @@ allOf: - qcom,msm8953-adsp-pil - qcom,msm8974-adsp-pil - qcom,msm8996-adsp-pil + - qcom,msm8996-slpi-pil - qcom,msm8998-adsp-pas + - qcom,msm8998-slpi-pas - qcom,sdm845-adsp-pas - qcom,sdm845-cdsp-pas - qcom,sdm845-slpi-pas @@ -84,24 +86,6 @@ allOf: compatible: contains: enum: - - qcom,msm8996-slpi-pil - - qcom,msm8998-slpi-pas - then: - properties: - clocks: - items: - - description: XO clock - - description: AGGRE2 clock - clock-names: - items: - - const: xo - - const: aggre2 - - - if: - properties: - compatible: - contains: - enum: - qcom,msm8226-adsp-pil - qcom,msm8953-adsp-pil - qcom,msm8974-adsp-pil diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml index 0643faae2c39..971734085d51 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml @@ -220,7 +220,6 @@ allOf: - description: GCC MSS GPLL0 clock - description: GCC MSS SNOC_AXI clock - description: GCC MSS MNOC_AXI clock - - description: RPM PNOC clock - description: RPM QDSS clock clock-names: items: @@ -231,7 +230,6 @@ allOf: - const: gpll0_mss - const: snoc_axi - const: mnoc_axi - - const: pnoc - const: qdss glink-edge: false required: diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml index 689d5d535331..f10f329677d8 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml @@ -16,6 +16,7 @@ description: properties: compatible: enum: + - qcom,sc7180-adsp-pas - qcom,sc7180-mpss-pas - qcom,sc7280-mpss-pas @@ -30,26 +31,6 @@ properties: items: - const: xo - interrupts: - minItems: 6 - - interrupt-names: - minItems: 6 - - power-domains: - minItems: 2 - items: - - description: CX power domain - - description: MX power domain - - description: MSS power domain - - power-domain-names: - minItems: 2 - items: - - const: cx - - const: mx - - const: mss - memory-region: maxItems: 1 description: Reference to the reserved-memory for the Hexagon core @@ -75,19 +56,69 @@ allOf: properties: compatible: enum: + - qcom,sc7180-adsp-pas + then: + properties: + interrupts: + maxItems: 5 + interrupt-names: + maxItems: 5 + else: + properties: + interrupts: + minItems: 6 + interrupt-names: + minItems: 6 + + - if: + properties: + compatible: + enum: + - qcom,sc7180-adsp-pas + then: + properties: + power-domains: + items: + - description: LCX power domain + - description: LMX power domain + power-domain-names: + items: + - const: lcx + - const: lmx + + - if: + properties: + compatible: + enum: - qcom,sc7180-mpss-pas then: properties: power-domains: - minItems: 3 + items: + - description: CX power domain + - description: MX power domain + - description: MSS power domain power-domain-names: - minItems: 3 - else: + items: + - const: cx + - const: mx + - const: mss + + - if: + properties: + compatible: + enum: + - qcom,sc7280-mpss-pas + then: properties: power-domains: - maxItems: 2 + items: + - description: CX power domain + - description: MX power domain power-domain-names: - maxItems: 2 + items: + - const: cx + - const: mx unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml new file mode 100644 index 000000000000..3e4a03eb4532 --- /dev/null +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/remoteproc/qcom,sm6375-pas.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SM6375 Peripheral Authentication Service + +maintainers: + - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + +description: + Qualcomm SM6375 SoC Peripheral Authentication Service loads and boots + firmware on the Qualcomm DSP Hexagon cores. + +properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + - qcom,sm6375-cdsp-pas + - qcom,sm6375-mpss-pas + + reg: + maxItems: 1 + + clocks: + items: + - description: XO clock + + clock-names: + items: + - const: xo + + memory-region: + maxItems: 1 + description: Reference to the reserved-memory for the Hexagon core + + firmware-name: + $ref: /schemas/types.yaml#/definitions/string + description: Firmware name for the Hexagon core + + smd-edge: false + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/remoteproc/qcom,pas-common.yaml# + - if: + properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + - qcom,sm6375-cdsp-pas + then: + properties: + interrupts: + maxItems: 5 + interrupt-names: + maxItems: 5 + else: + properties: + interrupts: + minItems: 6 + interrupt-names: + minItems: 6 + + - if: + properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + then: + properties: + power-domains: + items: + - description: LCX power domain + - description: LMX power domain + power-domain-names: + items: + - const: lcx + - const: lmx + + - if: + properties: + compatible: + enum: + - qcom,sm6375-cdsp-pas + - qcom,sm6375-mpss-pas + then: + properties: + power-domains: + items: + - description: CX power domain + power-domain-names: + items: + - const: cx + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,rpmcc.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/mailbox/qcom-ipcc.h> + #include <dt-bindings/power/qcom-rpmpd.h> + + remoteproc_adsp: remoteproc@a400000 { + compatible = "qcom,sm6375-adsp-pas"; + reg = <0x0a400000 0x100>; + + interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>, + <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 3 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "wdog", "fatal", "ready", + "handover", "stop-ack"; + + clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>; + clock-names = "xo"; + + power-domains = <&rpmpd SM6375_VDD_LPI_CX>, + <&rpmpd SM6375_VDD_LPI_MX>; + power-domain-names = "lcx", "lmx"; + + memory-region = <&pil_adsp_mem>; + + qcom,smem-states = <&smp2p_adsp_out 0>; + qcom,smem-state-names = "stop"; + + glink-edge { + interrupts-extended = <&ipcc IPCC_CLIENT_LPASS + IPCC_MPROC_SIGNAL_GLINK_QMP + IRQ_TYPE_EDGE_RISING>; + mboxes = <&ipcc IPCC_CLIENT_LPASS + IPCC_MPROC_SIGNAL_GLINK_QMP>; + + label = "lpass"; + qcom,remote-pid = <2>; + + /* ... */ + }; + }; diff --git a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml index baccd98754a9..faf16cf14085 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml @@ -66,6 +66,17 @@ properties: Should contain the name of the default firmware image file located on the firmware search path. + interrupts: + maxItems: 1 + description: + Interrupt specifiers enable the virtio/rpmsg communication between MPU + and the PRU/RTU cores. For the values of the interrupt cells please refer + to interrupt-controller/ti,pruss-intc.yaml schema. + + interrupt-names: + items: + - const: vring + if: properties: compatible: @@ -171,6 +182,9 @@ examples: <0x22400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-pru0_0-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <16 2 2>; + interrupt-names = "vring"; }; rtu0_0: rtu@4000 { @@ -180,6 +194,9 @@ examples: <0x23400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-rtu0_0-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <20 4 4>; + interrupt-names = "vring"; }; tx_pru0_0: txpru@a000 { @@ -198,6 +215,9 @@ examples: <0x24400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-pru0_1-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <18 3 3>; + interrupt-names = "vring"; }; rtu0_1: rtu@6000 { @@ -207,6 +227,9 @@ examples: <0x23c00 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-rtu0_1-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <22 5 5>; + interrupt-names = "vring"; }; tx_pru0_1: txpru@c000 { diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml index 443e2e7ab467..69845ec32e81 100644 --- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml @@ -15,9 +15,15 @@ allOf: properties: compatible: - enum: - - amlogic,meson-gxbb-wdt - - amlogic,t7-wdt + oneOf: + - enum: + - amlogic,meson-gxbb-wdt + - amlogic,t7-wdt + - items: + - enum: + - amlogic,c3-wdt + - amlogic,s4-wdt + - const: amlogic,t7-wdt reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt index a8197632d6d2..3208adb3e52e 100644 --- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt @@ -47,7 +47,15 @@ Optional properties for AST2500-compatible watchdogs: is configured as push-pull, then set the pulse polarity to active-high. The default is active-low. -Example: +Optional properties for AST2500- and AST2600-compatible watchdogs: + - aspeed,reset-mask: A bitmask indicating which peripherals will be reset if + the watchdog timer expires. On AST2500 this should be a + single word defined using the AST2500_WDT_RESET_* macros; + on AST2600 this should be a two-word array with the first + word defined using the AST2600_WDT_RESET1_* macros and the + second word defined using the AST2600_WDT_RESET2_* macros. + +Examples: wdt1: watchdog@1e785000 { compatible = "aspeed,ast2400-wdt"; @@ -55,3 +63,11 @@ Example: aspeed,reset-type = "system"; aspeed,external-signal; }; + + #include <dt-bindings/watchdog/aspeed-wdt.h> + wdt2: watchdog@1e785040 { + compatible = "aspeed,ast2600-wdt"; + reg = <0x1e785040 0x40>; + aspeed,reset-mask = <AST2600_WDT_RESET1_DEFAULT + (AST2600_WDT_RESET2_DEFAULT & ~AST2600_WDT_RESET2_LPC)>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml index 4b7ed1355701..9c50766bf690 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml @@ -30,6 +30,11 @@ properties: clocks: maxItems: 1 + fsl,ext-reset-output: + description: + When set, wdog can generate external reset from the wdog_any pin. + type: boolean + required: - compatible - interrupts diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index 5046dfa55f13..c12bc852aedc 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -21,6 +21,8 @@ properties: - qcom,apss-wdt-ipq5018 - qcom,apss-wdt-ipq5332 - qcom,apss-wdt-ipq9574 + - qcom,apss-wdt-msm8226 + - qcom,apss-wdt-msm8974 - qcom,apss-wdt-msm8994 - qcom,apss-wdt-qcm2290 - qcom,apss-wdt-qcs404 diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 198d805d611c..f04ce1215a03 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -122,12 +122,9 @@ are exportable by setting the s_export_op field in the struct super_block. This field must point to a "struct export_operations" struct which has the following members: - encode_fh (optional) + encode_fh (mandatory) Takes a dentry and creates a filehandle fragment which may later be used - to find or create a dentry for the same object. The default - implementation creates a filehandle fragment that encodes a 32bit inode - and generation number for the inode encoded, and if necessary the - same information for the parent. + to find or create a dentry for the same object. fh_to_dentry (mandatory) Given a filehandle fragment, this should find the implied object and diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 5b93268e400f..0407f361f32a 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -344,10 +344,11 @@ escaping the colons with a single backslash. For example: mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged -Since kernel version v6.5, directory names containing colons can also -be provided as lower layer using the fsconfig syscall from new mount api: +Since kernel version v6.8, directory names containing colons can also +be configured as lower layer using the "lowerdir+" mount options and the +fsconfig syscall from new mount api. For example: - fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0); In the latter case, colons in lower layer directory names will be escaped as an octal characters (\072) when displayed in /proc/self/mountinfo. @@ -416,6 +417,16 @@ Only the data of the files in the "data-only" lower layers may be visible when a "metacopy" file in one of the lower layers above it, has a "redirect" to the absolute path of the "lower data" file in the "data-only" lower layer. +Since kernel version v6.8, "data-only" lower layers can also be added using +the "datadir+" mount options and the fsconfig syscall from new mount api. +For example: + + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0); + fs-verity support ---------------------- @@ -504,6 +515,29 @@ directory tree on the same or different underlying filesystem, and even to a different machine. With the "inodes index" feature, trying to mount the copied layers will fail the verification of the lower root file handle. +Nesting overlayfs mounts +------------------------ + +It is possible to use a lower directory that is stored on an overlayfs +mount. For regular files this does not need any special care. However, files +that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be +interpreted by the underlying overlayfs mount and stripped out. In order to +allow the second overlayfs mount to see the attributes they must be escaped. + +Overlayfs specific xattrs are escaped by using a special prefix of +"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr +in the lower dir will be exposed as a regular file with a +"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by +repeating the prefix multiple time, as each instance only removes one prefix. + +A lower dir with a regular whiteout will always be handled by the overlayfs +mount, so to support storing an effective whiteout file in an overlayfs mount an +alternative form of whiteout is supported. This form is a regular, zero-size +file with the "overlay.whiteout" xattr set, inside a directory with the +"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs, +but can be used by userspace tools (like containers) that generate lower layers. +These alternative whiteouts can be escaped using the standard xattr escape +mechanism in order to properly nest to any depth. Non-standard behavior --------------------- diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index d69f59700a23..878e72b2f8b7 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1052,3 +1052,12 @@ kill_anon_super(), or kill_block_super() helpers. Lock ordering has been changed so that s_umount ranks above open_mutex again. All places where s_umount was taken under open_mutex have been fixed up. + +--- + +**mandatory** + +export_operations ->encode_fh() no longer has a default implementation to +encode FILEID_INO32_GEN* file handles. +Filesystems that used the default implementation may use the generic helper +generic_encode_ino32_fh() explicitly. diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index c6ba4889575a..572d83a414d0 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -71,6 +71,10 @@ definitions: name: roce-bit - name: migratable-bit + - + name: ipsec-crypto-bit + - + name: ipsec-packet-bit - type: enum name: sb-threshold-type diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 6d8acdbe9be1..769149d98773 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -44,18 +44,16 @@ smcr_testlink_time - INTEGER wmem - INTEGER Initial size of send buffer used by SMC sockets. - The default value inherits from net.ipv4.tcp_wmem[1]. The minimum value is 16KiB and there is no hard limit for max value, but only allowed 512KiB for SMC-R and 1MiB for SMC-D. - Default: 16K + Default: 64KiB rmem - INTEGER Initial size of receive buffer (RMB) used by SMC sockets. - The default value inherits from net.ipv4.tcp_rmem[1]. The minimum value is 16KiB and there is no hard limit for max value, but only allowed 512KiB for SMC-R and 1MiB for SMC-D. - Default: 128K + Default: 64KiB diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst index 8e9bebcf0a2e..e35e6b18df40 100644 --- a/Documentation/trace/fprobetrace.rst +++ b/Documentation/trace/fprobetrace.rst @@ -59,8 +59,12 @@ Synopsis of fprobe-events and bitfield are supported. (\*1) This is available only when BTF is enabled. - (\*2) only for the probe on function entry (offs == 0). - (\*3) only for return probe. + (\*2) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*3) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*4) this is useful for fetching a field of data structures. (\*5) "u" means user-space dereference. diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 8a2dfee38145..bf9cecb69fc9 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -61,8 +61,12 @@ Synopsis of kprobe_events (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" and bitfield are supported. - (\*1) only for the probe on function entry (offs == 0). - (\*2) only for return probe. + (\*1) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*2) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*3) this is useful for fetching a field of data structures. (\*4) "u" means user-space dereference. See :ref:`user_mem_access`. diff --git a/Documentation/userspace-api/media/v4l/subdev-formats.rst b/Documentation/userspace-api/media/v4l/subdev-formats.rst index a3a35eeed708..eb3cd20b0cf2 100644 --- a/Documentation/userspace-api/media/v4l/subdev-formats.rst +++ b/Documentation/userspace-api/media/v4l/subdev-formats.rst @@ -949,6 +949,78 @@ The following tables list existing packed RGB formats. - b\ :sub:`2` - b\ :sub:`1` - b\ :sub:`0` + * .. _MEDIA-BUS-FMT-RGB666-2X9-BE: + + - MEDIA_BUS_FMT_RGB666_2X9_BE + - 0x1025 + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - r\ :sub:`5` + - r\ :sub:`4` + - r\ :sub:`3` + - r\ :sub:`2` + - r\ :sub:`1` + - r\ :sub:`0` + - g\ :sub:`5` + - g\ :sub:`4` + - g\ :sub:`3` + * - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - g\ :sub:`2` + - g\ :sub:`1` + - g\ :sub:`0` + - b\ :sub:`5` + - b\ :sub:`4` + - b\ :sub:`3` + - b\ :sub:`2` + - b\ :sub:`1` + - b\ :sub:`0` * .. _MEDIA-BUS-FMT-BGR666-1X18: - MEDIA_BUS_FMT_BGR666_1X18 diff --git a/MAINTAINERS b/MAINTAINERS index 0b275b8d6bd2..97f51d5ec1cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -311,7 +311,7 @@ ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore <robert.moore@intel.com> M: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> L: linux-acpi@vger.kernel.org -L: acpica-devel@lists.linuxfoundation.org +L: acpica-devel@lists.linux.dev S: Supported W: https://acpica.org/ W: https://github.com/acpica/acpica/ @@ -6534,7 +6534,7 @@ F: drivers/gpu/drm/ast/ DRM DRIVER FOR BOCHS VIRTUAL GPU M: Gerd Hoffmann <kraxel@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/tiny/bochs.c @@ -6781,7 +6781,7 @@ F: drivers/gpu/drm/tiny/repaper.c DRM DRIVER FOR QEMU'S CIRRUS DEVICE M: Dave Airlie <airlied@redhat.com> M: Gerd Hoffmann <kraxel@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Obsolete W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ T: git git://anongit.freedesktop.org/drm/drm-misc @@ -6790,7 +6790,7 @@ F: drivers/gpu/drm/tiny/cirrus.c DRM DRIVER FOR QXL VIRTUAL GPU M: Dave Airlie <airlied@redhat.com> M: Gerd Hoffmann <kraxel@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: spice-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@ -7133,7 +7133,7 @@ F: drivers/gpu/host1x/ F: include/linux/host1x.h F: include/uapi/drm/tegra_drm.h -DRM DRIVERS FOR RENESAS +DRM DRIVERS FOR RENESAS R-CAR M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> M: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com> L: dri-devel@lists.freedesktop.org @@ -7144,7 +7144,16 @@ F: Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml F: Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml F: Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml F: Documentation/devicetree/bindings/display/renesas,du.yaml -F: drivers/gpu/drm/renesas/ +F: drivers/gpu/drm/renesas/rcar-du/ + +DRM DRIVERS FOR RENESAS SHMOBILE +M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> +M: Geert Uytterhoeven <geert+renesas@glider.be> +L: dri-devel@lists.freedesktop.org +L: linux-renesas-soc@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml +F: drivers/gpu/drm/renesas/shmobile/ F: include/linux/platform_data/shmob_drm.h DRM DRIVERS FOR ROCKCHIP @@ -7880,7 +7889,7 @@ F: drivers/net/can/usb/etas_es58x/ ETHERNET BRIDGE M: Roopa Prabhu <roopa@nvidia.com> M: Nikolay Aleksandrov <razor@blackwall.org> -L: bridge@lists.linux-foundation.org (moderated for non-subscribers) +L: bridge@lists.linux.dev L: netdev@vger.kernel.org S: Maintained W: http://www.linuxfoundation.org/en/Net:Bridge @@ -8156,6 +8165,27 @@ F: include/linux/fs_types.h F: include/uapi/linux/fs.h F: include/uapi/linux/openat2.h +FILESYSTEMS [EXPORTFS] +M: Chuck Lever <chuck.lever@oracle.com> +M: Jeff Layton <jlayton@kernel.org> +R: Amir Goldstein <amir73il@gmail.com> +L: linux-fsdevel@vger.kernel.org +L: linux-nfs@vger.kernel.org +S: Supported +F: Documentation/filesystems/nfs/exporting.rst +F: fs/exportfs/ +F: fs/fhandle.c +F: include/linux/exportfs.h + +FILESYSTEMS [IOMAP] +M: Christian Brauner <brauner@kernel.org> +R: Darrick J. Wong <djwong@kernel.org> +L: linux-xfs@vger.kernel.org +L: linux-fsdevel@vger.kernel.org +S: Supported +F: fs/iomap/ +F: include/linux/iomap.h + FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio <riku.voipio@iki.fi> L: linux-hwmon@vger.kernel.org @@ -11076,15 +11106,6 @@ L: linux-mips@vger.kernel.org S: Maintained F: drivers/net/ethernet/sgi/ioc3-eth.c -IOMAP FILESYSTEM LIBRARY -M: Darrick J. Wong <djwong@kernel.org> -L: linux-xfs@vger.kernel.org -L: linux-fsdevel@vger.kernel.org -S: Supported -T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git -F: fs/iomap/ -F: include/linux/iomap.h - IOMMU DMA-API LAYER M: Robin Murphy <robin.murphy@arm.com> L: iommu@lists.linux.dev @@ -11548,7 +11569,6 @@ S: Supported W: http://nfs.sourceforge.net/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git F: Documentation/filesystems/nfs/ -F: fs/exportfs/ F: fs/lockd/ F: fs/nfs_common/ F: fs/nfsd/ @@ -16374,7 +16394,7 @@ M: Juergen Gross <jgross@suse.com> R: Ajay Kaher <akaher@vmware.com> R: Alexey Makhalov <amakhalov@vmware.com> R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core @@ -22952,7 +22972,7 @@ VIRTIO AND VHOST VSOCK DRIVER M: Stefan Hajnoczi <stefanha@redhat.com> M: Stefano Garzarella <sgarzare@redhat.com> L: kvm@vger.kernel.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: netdev@vger.kernel.org S: Maintained F: drivers/vhost/vsock.c @@ -22964,7 +22984,7 @@ F: net/vmw_vsock/virtio_transport_common.c VIRTIO BALLOON M: "Michael S. Tsirkin" <mst@redhat.com> M: David Hildenbrand <david@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/virtio/virtio_balloon.c F: include/linux/balloon_compaction.h @@ -22976,7 +22996,7 @@ M: "Michael S. Tsirkin" <mst@redhat.com> M: Jason Wang <jasowang@redhat.com> R: Paolo Bonzini <pbonzini@redhat.com> R: Stefan Hajnoczi <stefanha@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/block/virtio_blk.c F: drivers/scsi/virtio_scsi.c @@ -22985,7 +23005,7 @@ F: include/uapi/linux/virtio_scsi.h VIRTIO CONSOLE DRIVER M: Amit Shah <amit@kernel.org> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/char/virtio_console.c F: include/linux/virtio_console.h @@ -22995,7 +23015,7 @@ VIRTIO CORE AND NET DRIVERS M: "Michael S. Tsirkin" <mst@redhat.com> M: Jason Wang <jasowang@redhat.com> R: Xuan Zhuo <xuanzhuo@linux.alibaba.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: Documentation/ABI/testing/sysfs-bus-vdpa F: Documentation/ABI/testing/sysfs-class-vduse @@ -23014,7 +23034,7 @@ F: tools/virtio/ VIRTIO CRYPTO DRIVER M: Gonglei <arei.gonglei@huawei.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/virtio/ @@ -23025,7 +23045,7 @@ M: Cornelia Huck <cohuck@redhat.com> M: Halil Pasic <pasic@linux.ibm.com> M: Eric Farman <farman@linux.ibm.com> L: linux-s390@vger.kernel.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: kvm@vger.kernel.org S: Supported F: arch/s390/include/uapi/asm/virtio-ccw.h @@ -23035,7 +23055,7 @@ VIRTIO FILE SYSTEM M: Vivek Goyal <vgoyal@redhat.com> M: Stefan Hajnoczi <stefanha@redhat.com> M: Miklos Szeredi <miklos@szeredi.hu> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported W: https://virtio-fs.gitlab.io/ @@ -23047,7 +23067,7 @@ VIRTIO GPIO DRIVER M: Enrico Weigelt, metux IT consult <info@metux.net> M: Viresh Kumar <vireshk@kernel.org> L: linux-gpio@vger.kernel.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/gpio/gpio-virtio.c F: include/uapi/linux/virtio_gpio.h @@ -23058,7 +23078,7 @@ M: Gerd Hoffmann <kraxel@redhat.com> R: Gurchetan Singh <gurchetansingh@chromium.org> R: Chia-I Wu <olvaffe@gmail.com> L: dri-devel@lists.freedesktop.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/ci/xfails/virtio* @@ -23069,7 +23089,7 @@ VIRTIO HOST (VHOST) M: "Michael S. Tsirkin" <mst@redhat.com> M: Jason Wang <jasowang@redhat.com> L: kvm@vger.kernel.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: netdev@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git @@ -23085,7 +23105,7 @@ M: Jason Wang <jasowang@redhat.com> M: Mike Christie <michael.christie@oracle.com> R: Paolo Bonzini <pbonzini@redhat.com> R: Stefan Hajnoczi <stefanha@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/vhost/scsi.c @@ -23093,7 +23113,7 @@ VIRTIO I2C DRIVER M: Conghui Chen <conghui.chen@intel.com> M: Viresh Kumar <viresh.kumar@linaro.org> L: linux-i2c@vger.kernel.org -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/i2c/busses/i2c-virtio.c F: include/uapi/linux/virtio_i2c.h @@ -23106,14 +23126,14 @@ F: include/uapi/linux/virtio_input.h VIRTIO IOMMU DRIVER M: Jean-Philippe Brucker <jean-philippe@linaro.org> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/iommu/virtio-iommu.c F: include/uapi/linux/virtio_iommu.h VIRTIO MEM DRIVER M: David Hildenbrand <david@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained W: https://virtio-mem.gitlab.io/ F: drivers/virtio/virtio_mem.c @@ -23121,7 +23141,7 @@ F: include/uapi/linux/virtio_mem.h VIRTIO PMEM DRIVER M: Pankaj Gupta <pankaj.gupta.linux@gmail.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev S: Maintained F: drivers/nvdimm/nd_virtio.c F: drivers/nvdimm/virtio_pmem.c @@ -23129,7 +23149,7 @@ F: drivers/nvdimm/virtio_pmem.c VIRTIO SOUND DRIVER M: Anton Yakovlev <anton.yakovlev@opensynergy.com> M: "Michael S. Tsirkin" <mst@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: include/uapi/linux/virtio_snd.h @@ -23178,16 +23198,9 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/test-drivers/vivid/* -VLYNQ BUS -M: Florian Fainelli <f.fainelli@gmail.com> -L: openwrt-devel@lists.openwrt.org (subscribers-only) -S: Maintained -F: drivers/vlynq/vlynq.c -F: include/linux/vlynq.h - VM SOCKETS (AF_VSOCK) M: Stefano Garzarella <sgarzare@redhat.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: netdev@vger.kernel.org S: Maintained F: drivers/net/vsockmon.c @@ -23231,7 +23244,7 @@ VMWARE HYPERVISOR INTERFACE M: Ajay Kaher <akaher@vmware.com> M: Alexey Makhalov <amakhalov@vmware.com> R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -L: virtualization@lists.linux-foundation.org +L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index de1566e32cb8..68e8301c0df2 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -32,9 +32,6 @@ struct kprobe; void arch_remove_kprobe(struct kprobe *p); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); - struct prev_kprobe { struct kprobe *kp; unsigned long status; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index bb9f0e5b0b63..10fd74bf85f9 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1076,7 +1076,6 @@ CONFIG_QCOM_IPCC=y CONFIG_OMAP_IOMMU=y CONFIG_OMAP_IOMMU_DEBUG=y CONFIG_ROCKCHIP_IOMMU=y -CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_EXYNOS_IOMMU=y CONFIG_QCOM_IOMMU=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 23c131b0854b..9e81b1849e4c 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -100,7 +100,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_AR7_PARTS=m CONFIG_MTD_CMDLINE_PARTS=m CONFIG_MTD_OF_PARTS=m CONFIG_MTD_AFS_PARTS=m diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 613f07b8ce15..8635b7216bfc 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -292,7 +292,6 @@ CONFIG_CHROME_PLATFORMS=y CONFIG_CROS_EC=y CONFIG_CROS_EC_I2C=m CONFIG_CROS_EC_SPI=m -CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_ARCH_TEGRA_2x_SOC=y CONFIG_ARCH_TEGRA_3x_SOC=y diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 72529f5e2bed..a41b503b7dcd 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -23,6 +23,8 @@ #define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) #define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) #define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMCEID2 __ACCESS_CP15(c9, 0, c14, 4) +#define PMCEID3 __ACCESS_CP15(c9, 0, c14, 5) #define PMMIR __ACCESS_CP15(c9, 0, c14, 6) #define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) @@ -150,21 +152,6 @@ static inline u64 read_pmccntr(void) return read_sysreg(PMCCNTR); } -static inline void write_pmxevcntr(u32 val) -{ - write_sysreg(val, PMXEVCNTR); -} - -static inline u32 read_pmxevcntr(void) -{ - return read_sysreg(PMXEVCNTR); -} - -static inline void write_pmxevtyper(u32 val) -{ - write_sysreg(val, PMXEVTYPER); -} - static inline void write_pmcntenset(u32 val) { write_sysreg(val, PMCNTENSET); @@ -205,16 +192,6 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, PMUSERENR); } -static inline u32 read_pmceid0(void) -{ - return read_sysreg(PMCEID0); -} - -static inline u32 read_pmceid1(void) -{ - return read_sysreg(PMCEID1); -} - static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) @@ -231,6 +208,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {} /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P1 0x4 #define ARMV8_PMU_DFR_VER_V3P4 0x5 #define ARMV8_PMU_DFR_VER_V3P5 0x6 #define ARMV8_PMU_DFR_VER_IMP_DEF 0xF @@ -251,4 +229,24 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ARMV8_PMU_DFR_VER_V3P5; } +static inline u64 read_pmceid0(void) +{ + u64 val = read_sysreg(PMCEID0); + + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1) + val |= (u64)read_sysreg(PMCEID2) << 32; + + return val; +} + +static inline u64 read_pmceid1(void) +{ + u64 val = read_sysreg(PMCEID1); + + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1) + val |= (u64)read_sysreg(PMCEID3) << 32; + + return val; +} + #endif diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index e26a278d301a..5b8dbf1b0be4 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -40,8 +40,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 4a74f3b6d775..bf7de35ffcbc 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -859,7 +859,7 @@ pinctrl-names = "default"; pinctrl-0 = <&scp_pins>; - cros_ec { + cros-ec-rpmsg { compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 1447eed0ea36..f2281250ac35 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1312,7 +1312,7 @@ pinctrl-names = "default"; pinctrl-0 = <&scp_pins>; - cros-ec { + cros-ec-rpmsg { compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 18dc2fb3d7b7..c27404fa4418 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -46,12 +46,12 @@ static inline u32 read_pmuver(void) ID_AA64DFR0_EL1_PMUVer_SHIFT); } -static inline void write_pmcr(u32 val) +static inline void write_pmcr(u64 val) { write_sysreg(val, pmcr_el0); } -static inline u32 read_pmcr(void) +static inline u64 read_pmcr(void) { return read_sysreg(pmcr_el0); } @@ -71,21 +71,6 @@ static inline u64 read_pmccntr(void) return read_sysreg(pmccntr_el0); } -static inline void write_pmxevcntr(u32 val) -{ - write_sysreg(val, pmxevcntr_el0); -} - -static inline u32 read_pmxevcntr(void) -{ - return read_sysreg(pmxevcntr_el0); -} - -static inline void write_pmxevtyper(u32 val) -{ - write_sysreg(val, pmxevtyper_el0); -} - static inline void write_pmcntenset(u32 val) { write_sysreg(val, pmcntenset_el0); @@ -106,7 +91,7 @@ static inline void write_pmintenclr(u32 val) write_sysreg(val, pmintenclr_el1); } -static inline void write_pmccfiltr(u32 val) +static inline void write_pmccfiltr(u64 val) { write_sysreg(val, pmccfiltr_el0); } @@ -126,12 +111,12 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, pmuserenr_el0); } -static inline u32 read_pmceid0(void) +static inline u64 read_pmceid0(void) { return read_sysreg(pmceid0_el0); } -static inline u32 read_pmceid1(void) +static inline u64 read_pmceid1(void) { return read_sysreg(pmceid1_el0); } diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 05cd82eeca13..be7a3680dadf 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -37,8 +37,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index 17f687510c48..d977713ec0ba 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -54,7 +54,6 @@ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f6b2e2906fc9..646591c67e7a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -999,6 +999,37 @@ static void init_32bit_cpu_features(struct cpuinfo_32bit *info) init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } +#ifdef CONFIG_ARM64_PSEUDO_NMI +static bool enable_pseudo_nmi; + +static int __init early_enable_pseudo_nmi(char *p) +{ + return kstrtobool(p, &enable_pseudo_nmi); +} +early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); + +static __init void detect_system_supports_pseudo_nmi(void) +{ + struct device_node *np; + + if (!enable_pseudo_nmi) + return; + + /* + * Detect broken MediaTek firmware that doesn't properly save and + * restore GIC priorities. + */ + np = of_find_compatible_node(NULL, NULL, "arm,gic-v3"); + if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) { + pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n"); + enable_pseudo_nmi = false; + } + of_node_put(np); +} +#else /* CONFIG_ARM64_PSEUDO_NMI */ +static inline void detect_system_supports_pseudo_nmi(void) { } +#endif + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -1058,6 +1089,13 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpucap_indirect_list(); /* + * Detect broken pseudo-NMI. Must be called _before_ the call to + * setup_boot_cpu_capabilities() since it interacts with + * can_use_gic_priorities(). + */ + detect_system_supports_pseudo_nmi(); + + /* * Detect and enable early CPU capabilities based on the boot CPU, * after we have initialised the CPU feature infrastructure. */ @@ -2085,14 +2123,6 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) #endif /* CONFIG_ARM64_E0PD */ #ifdef CONFIG_ARM64_PSEUDO_NMI -static bool enable_pseudo_nmi; - -static int __init early_enable_pseudo_nmi(char *p) -{ - return kstrtobool(p, &enable_pseudo_nmi); -} -early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); - static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index be95b523c101..defbab84e9e5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -965,10 +965,7 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) static bool ipi_should_be_nmi(enum ipi_msg_type ipi) { - DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis); - - if (!system_uses_irq_prio_masking() || - !static_branch_likely(&supports_pseudo_nmis)) + if (!system_uses_irq_prio_masking()) return false; switch (ipi) { diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d889a0b97bc1..ee123820a476 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -136,6 +136,7 @@ config LOONGARCH select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK select HAVE_RSEQ diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index b86f2ff31659..9eeb0c05f3f4 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,8 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) +KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index e27f0c72d324..99af8b3160a8 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -36,19 +36,19 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + "am"#asm_op".w" " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ -static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result c_op I; \ } -#define ATOMIC_FETCH_OP(op, I, asm_op) \ -static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ #define ATOMIC_OPS(op, I, asm_op, c_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(add, i, add, +) ATOMIC_OPS(sub, -i, add, +) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_add_return_acquire arch_atomic_add_return +#define arch_atomic_add_return_release arch_atomic_add_return #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_sub_return_acquire arch_atomic_sub_return +#define arch_atomic_sub_return_release arch_atomic_sub_return #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add +#define arch_atomic_fetch_add_release arch_atomic_fetch_add #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed #undef ATOMIC_OPS #define ATOMIC_OPS(op, I, asm_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(and, i, and) ATOMIC_OPS(or, i, or) ATOMIC_OPS(xor, i, xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and +#define arch_atomic_fetch_and_release arch_atomic_fetch_and #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or +#define arch_atomic_fetch_or_release arch_atomic_fetch_or #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed #undef ATOMIC_OPS @@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) static inline void arch_atomic64_##op(long i, atomic64_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + "am"#asm_op".d " " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ -static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \ { \ long result; \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ return result c_op I; \ } -#define ATOMIC64_FETCH_OP(op, I, asm_op) \ -static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \ { \ long result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ #define ATOMIC64_OPS(op, I, asm_op, c_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(add, i, add, +) ATOMIC64_OPS(sub, -i, add, +) +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_add_return_acquire arch_atomic64_add_return +#define arch_atomic64_add_return_release arch_atomic64_add_return #define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return +#define arch_atomic64_sub_return_release arch_atomic64_sub_return #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, I, asm_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(and, i, and) ATOMIC64_OPS(or, i, or) ATOMIC64_OPS(xor, i, xor) +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 008a88ead60d..d8f637f9e400 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -65,6 +65,8 @@ enum reg2_op { revbd_op = 0x0f, revh2w_op = 0x10, revhd_op = 0x11, + extwh_op = 0x16, + extwb_op = 0x17, iocsrrdb_op = 0x19200, iocsrrdh_op = 0x19201, iocsrrdw_op = 0x19202, @@ -572,6 +574,8 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) DEF_EMIT_REG2_FORMAT(revbd, revbd_op) +DEF_EMIT_REG2_FORMAT(extwh, extwh_op) +DEF_EMIT_REG2_FORMAT(extwb, extwb_op) #define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ @@ -623,6 +627,9 @@ DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) DEF_EMIT_REG2I12_FORMAT(andi, andi_op) DEF_EMIT_REG2I12_FORMAT(ori, ori_op) DEF_EMIT_REG2I12_FORMAT(xori, xori_op) +DEF_EMIT_REG2I12_FORMAT(ldb, ldb_op) +DEF_EMIT_REG2I12_FORMAT(ldh, ldh_op) +DEF_EMIT_REG2I12_FORMAT(ldw, ldw_op) DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) @@ -701,9 +708,12 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ insn->reg3_format.rk = rk; \ } +DEF_EMIT_REG3_FORMAT(addw, addw_op) DEF_EMIT_REG3_FORMAT(addd, addd_op) DEF_EMIT_REG3_FORMAT(subd, subd_op) DEF_EMIT_REG3_FORMAT(muld, muld_op) +DEF_EMIT_REG3_FORMAT(divd, divd_op) +DEF_EMIT_REG3_FORMAT(modd, modd_op) DEF_EMIT_REG3_FORMAT(divdu, divdu_op) DEF_EMIT_REG3_FORMAT(moddu, moddu_op) DEF_EMIT_REG3_FORMAT(and, and_op) @@ -715,6 +725,9 @@ DEF_EMIT_REG3_FORMAT(srlw, srlw_op) DEF_EMIT_REG3_FORMAT(srld, srld_op) DEF_EMIT_REG3_FORMAT(sraw, sraw_op) DEF_EMIT_REG3_FORMAT(srad, srad_op) +DEF_EMIT_REG3_FORMAT(ldxb, ldxb_op) +DEF_EMIT_REG3_FORMAT(ldxh, ldxh_op) +DEF_EMIT_REG3_FORMAT(ldxw, ldxw_op) DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index b9f567e66016..ed5da02b1cf6 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -32,7 +32,7 @@ static inline void set_my_cpu_offset(unsigned long off) #define __my_cpu_offset __my_cpu_offset #define PERCPU_OP(op, asm_op, c_op) \ -static inline unsigned long __percpu_##op(void *ptr, \ +static __always_inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ { \ unsigned long ret; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,8 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size) } } -static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) { switch (size) { case 1: diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index ef35c871244f..5bca12d16e06 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -504,8 +504,9 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); + rcutree_report_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index db9342b2d0e6..169ff8b3915e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -411,7 +411,11 @@ static int add_exception_handler(const struct bpf_insn *insn, off_t offset; struct exception_table_entry *ex; - if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (!ctx->image || !ctx->prog->aux->extable) + return 0; + + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) return 0; if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) @@ -450,7 +454,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext { u8 tm = -1; u64 func_addr; - bool func_addr_fixed; + bool func_addr_fixed, sign_extend; int i = insn - ctx->prog->insnsi; int ret, jmp_offset; const u8 code = insn->code; @@ -468,8 +472,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: - move_reg(ctx, dst, src); - emit_zext_32(ctx, dst, is32); + switch (off) { + case 0: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case 8: + move_reg(ctx, t1, src); + emit_insn(ctx, extwb, dst, t1); + break; + case 16: + move_reg(ctx, t1, src); + emit_insn(ctx, extwh, dst, t1); + break; + case 32: + emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); + break; + } break; /* dst = imm */ @@ -534,39 +553,71 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = dst / src */ case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit_zext_32(ctx, dst, is32); - move_reg(ctx, t1, src); - emit_zext_32(ctx, t1, is32); - emit_insn(ctx, divdu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + emit_sext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_sext_32(ctx, t1, is32); + emit_insn(ctx, divd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst / imm */ case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: - move_imm(ctx, t1, imm, is32); - emit_zext_32(ctx, dst, is32); - emit_insn(ctx, divdu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + move_imm(ctx, t1, imm, false); + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, dst, is32); + emit_insn(ctx, divd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst % src */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit_zext_32(ctx, dst, is32); - move_reg(ctx, t1, src); - emit_zext_32(ctx, t1, is32); - emit_insn(ctx, moddu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + emit_sext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_sext_32(ctx, t1, is32); + emit_insn(ctx, modd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst % imm */ case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: - move_imm(ctx, t1, imm, is32); - emit_zext_32(ctx, dst, is32); - emit_insn(ctx, moddu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + move_imm(ctx, t1, imm, false); + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, dst, is32); + emit_insn(ctx, modd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = -dst */ @@ -712,6 +763,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: emit_insn(ctx, revb2h, dst, dst); @@ -828,7 +880,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* PC += off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2la_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + jmp_offset = bpf2la_offset(i, off, ctx); + else + jmp_offset = bpf2la_offset(i, imm, ctx); if (emit_uncond_jmp(ctx, jmp_offset) < 0) goto toofar; break; @@ -879,31 +935,56 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* dst_reg = (s64)*(signed size *)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + sign_extend = BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { - emit_insn(ctx, ldbu, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldb, dst, src, off); + else + emit_insn(ctx, ldbu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxbu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxb, dst, src, t1); + else + emit_insn(ctx, ldxbu, dst, src, t1); } break; case BPF_H: if (is_signed_imm12(off)) { - emit_insn(ctx, ldhu, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldh, dst, src, off); + else + emit_insn(ctx, ldhu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxhu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxh, dst, src, t1); + else + emit_insn(ctx, ldxhu, dst, src, t1); } break; case BPF_W: if (is_signed_imm12(off)) { - emit_insn(ctx, ldwu, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrw, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldw, dst, src, off); + else + emit_insn(ctx, ldwu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxwu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxw, dst, src, t1); + else + emit_insn(ctx, ldxwu, dst, src, t1); } break; case BPF_DW: diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index caad195ba5c1..a2311c4bce6a 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -2,7 +2,6 @@ # All platforms listed in alphabetic order platform-$(CONFIG_MIPS_ALCHEMY) += alchemy/ -platform-$(CONFIG_AR7) += ar7/ platform-$(CONFIG_ATH25) += ath25/ platform-$(CONFIG_ATH79) += ath79/ platform-$(CONFIG_BCM47XX) += bcm47xx/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index bc8421859006..76db82542519 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -202,28 +202,6 @@ config MIPS_ALCHEMY select SYS_SUPPORTS_ZBOOT select COMMON_CLK -config AR7 - bool "Texas Instruments AR7" - select BOOT_ELF32 - select COMMON_CLK - select DMA_NONCOHERENT - select CEVT_R4K - select CSRC_R4K - select IRQ_MIPS_CPU - select NO_EXCEPT_FILL - select SWAP_IO_SPACE - select SYS_HAS_CPU_MIPS32_R1 - select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_MIPS16 - select SYS_SUPPORTS_ZBOOT_UART16550 - select GPIOLIB - select VLYNQ - help - Support for the Texas Instruments AR7 System-on-a-Chip - family: TNETD7100, 7200 and 7300. - config ATH25 bool "Atheros AR231x/AR531x SoC support" select CEVT_R4K diff --git a/arch/mips/ar7/Makefile b/arch/mips/ar7/Makefile deleted file mode 100644 index cd51c6c6e686..000000000000 --- a/arch/mips/ar7/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-y := \ - prom.o \ - setup.o \ - memory.o \ - irq.o \ - time.o \ - platform.o \ - gpio.o \ - clock.o diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform deleted file mode 100644 index a9257cc01c3c..000000000000 --- a/arch/mips/ar7/Platform +++ /dev/null @@ -1,5 +0,0 @@ -# -# Texas Instruments AR7 -# -cflags-$(CONFIG_AR7) += -I$(srctree)/arch/mips/include/asm/mach-ar7 -load-$(CONFIG_AR7) += 0xffffffff94100000 diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c deleted file mode 100644 index c717acbc5506..000000000000 --- a/arch/mips/ar7/clock.c +++ /dev/null @@ -1,439 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - * Copyright (C) 2009 Florian Fainelli <florian@openwrt.org> - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/export.h> -#include <linux/delay.h> -#include <linux/gcd.h> -#include <linux/io.h> -#include <linux/err.h> -#include <linux/clkdev.h> -#include <linux/clk.h> -#include <linux/clk-provider.h> - -#include <asm/addrspace.h> -#include <asm/mach-ar7/ar7.h> - -#define BOOT_PLL_SOURCE_MASK 0x3 -#define CPU_PLL_SOURCE_SHIFT 16 -#define BUS_PLL_SOURCE_SHIFT 14 -#define USB_PLL_SOURCE_SHIFT 18 -#define DSP_PLL_SOURCE_SHIFT 22 -#define BOOT_PLL_SOURCE_AFE 0 -#define BOOT_PLL_SOURCE_BUS 0 -#define BOOT_PLL_SOURCE_REF 1 -#define BOOT_PLL_SOURCE_XTAL 2 -#define BOOT_PLL_SOURCE_CPU 3 -#define BOOT_PLL_BYPASS 0x00000020 -#define BOOT_PLL_ASYNC_MODE 0x02000000 -#define BOOT_PLL_2TO1_MODE 0x00008000 - -#define TNETD7200_CLOCK_ID_CPU 0 -#define TNETD7200_CLOCK_ID_DSP 1 -#define TNETD7200_CLOCK_ID_USB 2 - -#define TNETD7200_DEF_CPU_CLK 211000000 -#define TNETD7200_DEF_DSP_CLK 125000000 -#define TNETD7200_DEF_USB_CLK 48000000 - -struct tnetd7300_clock { - u32 ctrl; -#define PREDIV_MASK 0x001f0000 -#define PREDIV_SHIFT 16 -#define POSTDIV_MASK 0x0000001f - u32 unused1[3]; - u32 pll; -#define MUL_MASK 0x0000f000 -#define MUL_SHIFT 12 -#define PLL_MODE_MASK 0x00000001 -#define PLL_NDIV 0x00000800 -#define PLL_DIV 0x00000002 -#define PLL_STATUS 0x00000001 - u32 unused2[3]; -}; - -struct tnetd7300_clocks { - struct tnetd7300_clock bus; - struct tnetd7300_clock cpu; - struct tnetd7300_clock usb; - struct tnetd7300_clock dsp; -}; - -struct tnetd7200_clock { - u32 ctrl; - u32 unused1[3]; -#define DIVISOR_ENABLE_MASK 0x00008000 - u32 mul; - u32 prediv; - u32 postdiv; - u32 postdiv2; - u32 unused2[6]; - u32 cmd; - u32 status; - u32 cmden; - u32 padding[15]; -}; - -struct tnetd7200_clocks { - struct tnetd7200_clock cpu; - struct tnetd7200_clock dsp; - struct tnetd7200_clock usb; -}; - -struct clk_rate { - u32 rate; -}; -static struct clk_rate bus_clk = { - .rate = 125000000, -}; - -static struct clk_rate cpu_clk = { - .rate = 150000000, -}; - -static void approximate(int base, int target, int *prediv, - int *postdiv, int *mul) -{ - int i, j, k, freq, res = target; - for (i = 1; i <= 16; i++) - for (j = 1; j <= 32; j++) - for (k = 1; k <= 32; k++) { - freq = abs(base / j * i / k - target); - if (freq < res) { - res = freq; - *mul = i; - *prediv = j; - *postdiv = k; - } - } -} - -static void calculate(int base, int target, int *prediv, int *postdiv, - int *mul) -{ - int tmp_gcd, tmp_base, tmp_freq; - - for (*prediv = 1; *prediv <= 32; (*prediv)++) { - tmp_base = base / *prediv; - tmp_gcd = gcd(target, tmp_base); - *mul = target / tmp_gcd; - *postdiv = tmp_base / tmp_gcd; - if ((*mul < 1) || (*mul >= 16)) - continue; - if ((*postdiv > 0) & (*postdiv <= 32)) - break; - } - - if (base / *prediv * *mul / *postdiv != target) { - approximate(base, target, prediv, postdiv, mul); - tmp_freq = base / *prediv * *mul / *postdiv; - printk(KERN_WARNING - "Adjusted requested frequency %d to %d\n", - target, tmp_freq); - } - - printk(KERN_DEBUG "Clocks: prediv: %d, postdiv: %d, mul: %d\n", - *prediv, *postdiv, *mul); -} - -static int tnetd7300_dsp_clock(void) -{ - u32 didr1, didr2; - u8 rev = ar7_chip_rev(); - didr1 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x18)); - didr2 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x1c)); - if (didr2 & (1 << 23)) - return 0; - if ((rev >= 0x23) && (rev != 0x57)) - return 250000000; - if ((((didr2 & 0x1fff) << 10) | ((didr1 & 0xffc00000) >> 22)) - > 4208000) - return 250000000; - return 0; -} - -static int tnetd7300_get_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 bus_clock) -{ - int product; - int base_clock = AR7_REF_CLOCK; - u32 ctrl = readl(&clock->ctrl); - u32 pll = readl(&clock->pll); - int prediv = ((ctrl & PREDIV_MASK) >> PREDIV_SHIFT) + 1; - int postdiv = (ctrl & POSTDIV_MASK) + 1; - int divisor = prediv * postdiv; - int mul = ((pll & MUL_MASK) >> MUL_SHIFT) + 1; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clock; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - if (*bootcr & BOOT_PLL_BYPASS) - return base_clock / divisor; - - if ((pll & PLL_MODE_MASK) == 0) - return (base_clock >> (mul / 16 + 1)) / divisor; - - if ((pll & (PLL_NDIV | PLL_DIV)) == (PLL_NDIV | PLL_DIV)) { - product = (mul & 1) ? - (base_clock * mul) >> 1 : - (base_clock * (mul - 1)) >> 2; - return product / divisor; - } - - if (mul == 16) - return base_clock / divisor; - - return base_clock * mul / divisor; -} - -static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 frequency) -{ - int prediv, postdiv, mul; - int base_clock = bus_clk.rate; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clk.rate; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - calculate(base_clock, frequency, &prediv, &postdiv, &mul); - - writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl); - mdelay(1); - writel(4, &clock->pll); - while (readl(&clock->pll) & PLL_STATUS) - ; - writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll); - mdelay(75); -} - -static void __init tnetd7300_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7300_clocks *clocks = - ioremap(UR8_REGS_CLOCKS, - sizeof(struct tnetd7300_clocks)); - u32 dsp_clk; - struct clk *clk; - - bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT, - &clocks->bus, bootcr, AR7_AFE_CLOCK); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) - cpu_clk.rate = tnetd7300_get_clock(CPU_PLL_SOURCE_SHIFT, - &clocks->cpu, bootcr, AR7_AFE_CLOCK); - else - cpu_clk.rate = bus_clk.rate; - - dsp_clk = tnetd7300_dsp_clock(); - if (dsp_clk == 250000000) - tnetd7300_set_clock(DSP_PLL_SOURCE_SHIFT, &clocks->dsp, - bootcr, dsp_clk); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clk = clk_register_fixed_rate(NULL, "dsp", NULL, 0, dsp_clk); - clkdev_create(clk, "dsp", NULL); -} - -static void tnetd7200_set_clock(int base, struct tnetd7200_clock *clock, - int prediv, int postdiv, int postdiv2, int mul, u32 frequency) -{ - printk(KERN_INFO - "Clocks: base = %d, frequency = %u, prediv = %d, " - "postdiv = %d, postdiv2 = %d, mul = %d\n", - base, frequency, prediv, postdiv, postdiv2, mul); - - writel(0, &clock->ctrl); - writel(DIVISOR_ENABLE_MASK | ((prediv - 1) & 0x1F), &clock->prediv); - writel((mul - 1) & 0xF, &clock->mul); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv - 1) & 0x1F), &clock->postdiv); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv2 - 1) & 0x1F), &clock->postdiv2); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(readl(&clock->ctrl) | 1, &clock->ctrl); -} - -static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr) -{ - if (*bootcr & BOOT_PLL_ASYNC_MODE) - /* Async */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* Sync */ - if (*bootcr & BOOT_PLL_2TO1_MODE) - /* 2:1 */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* 1:1 */ - return AR7_REF_CLOCK; -} - - -static void __init tnetd7200_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7200_clocks *clocks = - ioremap(AR7_REGS_CLOCKS, - sizeof(struct tnetd7200_clocks)); - int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv; - int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv; - int usb_base, usb_mul, usb_prediv, usb_postdiv; - struct clk *clk; - - cpu_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_CPU, bootcr); - dsp_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_DSP, bootcr); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) { - printk(KERN_INFO "Clocks: Async mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, - &dsp_prediv, &dsp_postdiv, &dsp_mul); - bus_clk.rate = - ((dsp_base / dsp_prediv) * dsp_mul) / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, dsp_mul * 2, - bus_clk.rate); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = - ((cpu_base / cpu_prediv) * cpu_mul) / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - } else - if (*bootcr & BOOT_PLL_2TO1_MODE) { - printk(KERN_INFO "Clocks: Sync 2:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = ((cpu_base / cpu_prediv) * cpu_mul) - / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = cpu_clk.rate / 2; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - } else { - printk(KERN_INFO "Clocks: Sync 1:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = ((dsp_base / dsp_prediv) * dsp_mul) - / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - - cpu_clk.rate = bus_clk.rate; - } - - printk(KERN_INFO "Clocks: Setting USB clock\n"); - usb_base = bus_clk.rate; - calculate(usb_base, TNETD7200_DEF_USB_CLK, &usb_prediv, - &usb_postdiv, &usb_mul); - tnetd7200_set_clock(usb_base, &clocks->usb, - usb_prediv, usb_postdiv, -1, usb_mul, - TNETD7200_DEF_USB_CLK); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clkdev_create(clk, "dsp", NULL); -} - -void __init ar7_init_clocks(void) -{ - struct clk *clk; - - switch (ar7_chip_id()) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - tnetd7200_init_clocks(); - break; - case AR7_CHIP_7300: - tnetd7300_init_clocks(); - break; - default: - break; - } - clk = clk_register_fixed_rate(NULL, "bus", NULL, 0, bus_clk.rate); - clkdev_create(clk, "bus", NULL); - /* adjust vbus clock rate */ - clk = clk_register_fixed_factor(NULL, "vbus", "bus", 0, 1, 2); - clkdev_create(clk, "vbus", NULL); - clkdev_create(clk, "cpmac", "cpmac.1"); - clkdev_create(clk, "cpmac", "cpmac.1"); -} diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c deleted file mode 100644 index 4ed833b9cc2f..000000000000 --- a/arch/mips/ar7/gpio.c +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - * Copyright (C) 2009-2010 Florian Fainelli <florian@openwrt.org> - */ - -#include <linux/init.h> -#include <linux/export.h> -#include <linux/gpio/driver.h> - -#include <asm/mach-ar7/ar7.h> - -#define AR7_GPIO_MAX 32 -#define TITAN_GPIO_MAX 51 - -struct ar7_gpio_chip { - void __iomem *regs; - struct gpio_chip chip; -}; - -static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT; - - return !!(readl(gpio_in) & (1 << gpio)); -} - -static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0; - void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1; - - return readl(gpio >> 5 ? gpio_in1 : gpio_in0) & (1 << (gpio & 0x1f)); -} - -static void ar7_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT; - unsigned tmp; - - tmp = readl(gpio_out) & ~(1 << gpio); - if (value) - tmp |= 1 << gpio; - writel(tmp, gpio_out); -} - -static void titan_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0; - void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1; - unsigned tmp; - - tmp = readl(gpio >> 5 ? gpio_out1 : gpio_out0) & ~(1 << (gpio & 0x1f)); - if (value) - tmp |= 1 << (gpio & 0x1f); - writel(tmp, gpio >> 5 ? gpio_out1 : gpio_out0); -} - -static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - writel(readl(gpio_dir) | (1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_dir1 : gpio_dir0); - return 0; -} - -static int ar7_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - ar7_gpio_set_value(chip, gpio, value); - writel(readl(gpio_dir) & ~(1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - titan_gpio_set_value(chip, gpio, value); - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) & ~(1 << - (gpio & 0x1f)), gpio >> 5 ? gpio_dir1 : gpio_dir0); - - return 0; -} - -static struct ar7_gpio_chip ar7_gpio_chip = { - .chip = { - .label = "ar7-gpio", - .direction_input = ar7_gpio_direction_input, - .direction_output = ar7_gpio_direction_output, - .set = ar7_gpio_set_value, - .get = ar7_gpio_get_value, - .base = 0, - .ngpio = AR7_GPIO_MAX, - } -}; - -static struct ar7_gpio_chip titan_gpio_chip = { - .chip = { - .label = "titan-gpio", - .direction_input = titan_gpio_direction_input, - .direction_output = titan_gpio_direction_output, - .set = titan_gpio_set_value, - .get = titan_gpio_get_value, - .base = 0, - .ngpio = TITAN_GPIO_MAX, - } -}; - -static inline int ar7_gpio_enable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) | (1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_enable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_enable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_enable_titan(gpio) : - ar7_gpio_enable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_enable); - -static inline int ar7_gpio_disable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) & ~(1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_disable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) & ~(1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_disable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_disable_titan(gpio) : - ar7_gpio_disable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_disable); - -struct titan_gpio_cfg { - u32 reg; - u32 shift; - u32 func; -}; - -static const struct titan_gpio_cfg titan_gpio_table[] = { - /* reg, start bit, mux value */ - {4, 24, 1}, - {4, 26, 1}, - {4, 28, 1}, - {4, 30, 1}, - {5, 6, 1}, - {5, 8, 1}, - {5, 10, 1}, - {5, 12, 1}, - {7, 14, 3}, - {7, 16, 3}, - {7, 18, 3}, - {7, 20, 3}, - {7, 22, 3}, - {7, 26, 3}, - {7, 28, 3}, - {7, 30, 3}, - {8, 0, 3}, - {8, 2, 3}, - {8, 4, 3}, - {8, 10, 3}, - {8, 14, 3}, - {8, 16, 3}, - {8, 18, 3}, - {8, 20, 3}, - {9, 8, 3}, - {9, 10, 3}, - {9, 12, 3}, - {9, 14, 3}, - {9, 18, 3}, - {9, 20, 3}, - {9, 24, 3}, - {9, 26, 3}, - {9, 28, 3}, - {9, 30, 3}, - {10, 0, 3}, - {10, 2, 3}, - {10, 8, 3}, - {10, 10, 3}, - {10, 12, 3}, - {10, 14, 3}, - {13, 12, 3}, - {13, 14, 3}, - {13, 16, 3}, - {13, 18, 3}, - {13, 24, 3}, - {13, 26, 3}, - {13, 28, 3}, - {13, 30, 3}, - {14, 2, 3}, - {14, 6, 3}, - {14, 8, 3}, - {14, 12, 3} -}; - -static int titan_gpio_pinsel(unsigned gpio) -{ - struct titan_gpio_cfg gpio_cfg; - u32 mux_status, pin_sel_reg, tmp; - void __iomem *pin_sel = (void __iomem *)KSEG1ADDR(AR7_REGS_PINSEL); - - if (gpio >= ARRAY_SIZE(titan_gpio_table)) - return -EINVAL; - - gpio_cfg = titan_gpio_table[gpio]; - pin_sel_reg = gpio_cfg.reg - 1; - - mux_status = (readl(pin_sel + pin_sel_reg) >> gpio_cfg.shift) & 0x3; - - /* Check the mux status */ - if (!((mux_status == 0) || (mux_status == gpio_cfg.func))) - return 0; - - /* Set the pin sel value */ - tmp = readl(pin_sel + pin_sel_reg); - tmp |= ((gpio_cfg.func & 0x3) << gpio_cfg.shift); - writel(tmp, pin_sel + pin_sel_reg); - - return 0; -} - -/* Perform minimal Titan GPIO configuration */ -static void titan_gpio_init(void) -{ - unsigned i; - - for (i = 44; i < 48; i++) { - titan_gpio_pinsel(i); - ar7_gpio_enable_titan(i); - titan_gpio_direction_input(&titan_gpio_chip.chip, i); - } -} - -int __init ar7_gpio_init(void) -{ - int ret; - struct ar7_gpio_chip *gpch; - unsigned size; - - if (!ar7_is_titan()) { - gpch = &ar7_gpio_chip; - size = 0x10; - } else { - gpch = &titan_gpio_chip; - size = 0x1f; - } - - gpch->regs = ioremap(AR7_REGS_GPIO, size); - if (!gpch->regs) { - printk(KERN_ERR "%s: failed to ioremap regs\n", - gpch->chip.label); - return -ENOMEM; - } - - ret = gpiochip_add_data(&gpch->chip, gpch); - if (ret) { - printk(KERN_ERR "%s: failed to add gpiochip\n", - gpch->chip.label); - iounmap(gpch->regs); - return ret; - } - printk(KERN_INFO "%s: registered %d GPIOs\n", - gpch->chip.label, gpch->chip.ngpio); - - if (ar7_is_titan()) - titan_gpio_init(); - - return ret; -} diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c deleted file mode 100644 index f0a7942d393e..000000000000 --- a/arch/mips/ar7/irq.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> - -#include <asm/irq_cpu.h> -#include <asm/mipsregs.h> -#include <asm/mach-ar7/ar7.h> - -#define EXCEPT_OFFSET 0x80 -#define PACE_OFFSET 0xA0 -#define CHNLS_OFFSET 0x200 - -#define REG_OFFSET(irq, reg) ((irq) / 32 * 0x4 + reg * 0x10) -#define SEC_REG_OFFSET(reg) (EXCEPT_OFFSET + reg * 0x8) -#define SEC_SR_OFFSET (SEC_REG_OFFSET(0)) /* 0x80 */ -#define CR_OFFSET(irq) (REG_OFFSET(irq, 1)) /* 0x10 */ -#define SEC_CR_OFFSET (SEC_REG_OFFSET(1)) /* 0x88 */ -#define ESR_OFFSET(irq) (REG_OFFSET(irq, 2)) /* 0x20 */ -#define SEC_ESR_OFFSET (SEC_REG_OFFSET(2)) /* 0x90 */ -#define ECR_OFFSET(irq) (REG_OFFSET(irq, 3)) /* 0x30 */ -#define SEC_ECR_OFFSET (SEC_REG_OFFSET(3)) /* 0x98 */ -#define PIR_OFFSET (0x40) -#define MSR_OFFSET (0x44) -#define PM_OFFSET(irq) (REG_OFFSET(irq, 5)) /* 0x50 */ -#define TM_OFFSET(irq) (REG_OFFSET(irq, 6)) /* 0x60 */ - -#define REG(addr) ((u32 *)(KSEG1ADDR(AR7_REGS_IRQ) + addr)) - -#define CHNL_OFFSET(chnl) (CHNLS_OFFSET + (chnl * 4)) - -static int ar7_irq_base; - -static void ar7_unmask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ESR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_mask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ECR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_ack_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(CR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_unmask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET)); -} - -static void ar7_mask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET)); -} - -static void ar7_ack_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET)); -} - -static struct irq_chip ar7_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_irq, - .irq_mask = ar7_mask_irq, - .irq_ack = ar7_ack_irq -}; - -static struct irq_chip ar7_sec_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_sec_irq, - .irq_mask = ar7_mask_sec_irq, - .irq_ack = ar7_ack_sec_irq, -}; - -static void __init ar7_irq_init(int base) -{ - int i; - /* - * Disable interrupts and clear pending - */ - writel(0xffffffff, REG(ECR_OFFSET(0))); - writel(0xff, REG(ECR_OFFSET(32))); - writel(0xffffffff, REG(SEC_ECR_OFFSET)); - writel(0xffffffff, REG(CR_OFFSET(0))); - writel(0xff, REG(CR_OFFSET(32))); - writel(0xffffffff, REG(SEC_CR_OFFSET)); - - ar7_irq_base = base; - - for (i = 0; i < 40; i++) { - writel(i, REG(CHNL_OFFSET(i))); - /* Primary IRQ's */ - irq_set_chip_and_handler(base + i, &ar7_irq_type, - handle_level_irq); - /* Secondary IRQ's */ - if (i < 32) - irq_set_chip_and_handler(base + i + 40, - &ar7_sec_irq_type, - handle_level_irq); - } - - if (request_irq(2, no_action, IRQF_NO_THREAD, "AR7 cascade interrupt", - NULL)) - pr_err("Failed to request irq 2 (AR7 cascade interrupt)\n"); - if (request_irq(ar7_irq_base, no_action, IRQF_NO_THREAD, - "AR7 cascade interrupt", NULL)) { - pr_err("Failed to request irq %d (AR7 cascade interrupt)\n", - ar7_irq_base); - } - set_c0_status(IE_IRQ0); -} - -void __init arch_init_irq(void) -{ - mips_cpu_irq_init(); - ar7_irq_init(8); -} - -static void ar7_cascade(void) -{ - u32 status; - int i, irq; - - /* Primary IRQ's */ - irq = readl(REG(PIR_OFFSET)) & 0x3f; - if (irq) { - do_IRQ(ar7_irq_base + irq); - return; - } - - /* Secondary IRQ's are cascaded through primary '0' */ - writel(1, REG(CR_OFFSET(irq))); - status = readl(REG(SEC_SR_OFFSET)); - for (i = 0; i < 32; i++) { - if (status & 1) { - do_IRQ(ar7_irq_base + i + 40); - return; - } - status >>= 1; - } - - spurious_interrupt(); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - if (pending & STATUSF_IP7) /* cpu timer */ - do_IRQ(7); - else if (pending & STATUSF_IP2) /* int0 hardware line */ - ar7_cascade(); - else - spurious_interrupt(); -} diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c deleted file mode 100644 index ce8024c1a54e..000000000000 --- a/arch/mips/ar7/memory.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2007 Eugene Konev <ejka@openwrt.org> - */ -#include <linux/memblock.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/pfn.h> -#include <linux/proc_fs.h> -#include <linux/string.h> -#include <linux/swap.h> - -#include <asm/bootinfo.h> -#include <asm/page.h> -#include <asm/sections.h> - -#include <asm/mach-ar7/ar7.h> - -static int __init memsize(void) -{ - u32 size = (64 << 20); - u32 *addr = (u32 *)KSEG1ADDR(AR7_SDRAM_BASE + size - 4); - u32 *kernel_end = (u32 *)KSEG1ADDR(CPHYSADDR((u32)&_end)); - u32 *tmpaddr = addr; - - while (tmpaddr > kernel_end) { - *tmpaddr = (u32)tmpaddr; - size >>= 1; - tmpaddr -= size >> 2; - } - - do { - tmpaddr += size >> 2; - if (*tmpaddr != (u32)tmpaddr) - break; - size <<= 1; - } while (size < (64 << 20)); - - writel((u32)tmpaddr, &addr); - - return size; -} - -void __init prom_meminit(void) -{ - unsigned long pages; - - pages = memsize() >> PAGE_SHIFT; - memblock_add(PHYS_OFFSET, pages << PAGE_SHIFT); -} diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c deleted file mode 100644 index 215149a85d83..000000000000 --- a/arch/mips/ar7/platform.c +++ /dev/null @@ -1,722 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/platform_device.h> -#include <linux/mtd/physmap.h> -#include <linux/serial.h> -#include <linux/serial_8250.h> -#include <linux/ioport.h> -#include <linux/io.h> -#include <linux/vlynq.h> -#include <linux/leds.h> -#include <linux/string.h> -#include <linux/etherdevice.h> -#include <linux/phy.h> -#include <linux/phy_fixed.h> -#include <linux/gpio.h> -#include <linux/clk.h> - -#include <asm/addrspace.h> -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -/***************************************************************************** - * VLYNQ Bus - ****************************************************************************/ -struct plat_vlynq_data { - struct plat_vlynq_ops ops; - int gpio_bit; - int reset_bit; -}; - -static int vlynq_on(struct vlynq_device *dev) -{ - int ret; - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ret = gpio_request(pdata->gpio_bit, "vlynq"); - if (ret) - goto out; - - ar7_device_reset(pdata->reset_bit); - - ret = ar7_gpio_disable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = ar7_gpio_enable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = gpio_direction_output(pdata->gpio_bit, 0); - if (ret) - goto out_gpio_enabled; - - msleep(50); - - gpio_set_value(pdata->gpio_bit, 1); - - msleep(50); - - return 0; - -out_gpio_enabled: - ar7_gpio_disable(pdata->gpio_bit); -out_enabled: - ar7_device_disable(pdata->reset_bit); - gpio_free(pdata->gpio_bit); -out: - return ret; -} - -static void vlynq_off(struct vlynq_device *dev) -{ - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ar7_gpio_disable(pdata->gpio_bit); - gpio_free(pdata->gpio_bit); - ar7_device_disable(pdata->reset_bit); -} - -static struct resource vlynq_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ0, - .end = AR7_REGS_VLYNQ0 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 29, - .end = 29, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x04000000, - .end = 0x04ffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 80, - .end = 111, - }, -}; - -static struct resource vlynq_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ1, - .end = AR7_REGS_VLYNQ1 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 33, - .end = 33, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x0c000000, - .end = 0x0cffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 112, - .end = 143, - }, -}; - -static struct plat_vlynq_data vlynq_low_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 20, - .gpio_bit = 18, -}; - -static struct plat_vlynq_data vlynq_high_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 16, - .gpio_bit = 19, -}; - -static struct platform_device vlynq_low = { - .id = 0, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_low_data, - }, - .resource = vlynq_low_res, - .num_resources = ARRAY_SIZE(vlynq_low_res), -}; - -static struct platform_device vlynq_high = { - .id = 1, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_high_data, - }, - .resource = vlynq_high_res, - .num_resources = ARRAY_SIZE(vlynq_high_res), -}; - -/***************************************************************************** - * Flash - ****************************************************************************/ -static struct resource physmap_flash_resource = { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x10000000, - .end = 0x107fffff, -}; - -static const char *ar7_probe_types[] = { "ar7part", NULL }; - -static struct physmap_flash_data physmap_flash_data = { - .width = 2, - .part_probe_types = ar7_probe_types, -}; - -static struct platform_device physmap_flash = { - .name = "physmap-flash", - .dev = { - .platform_data = &physmap_flash_data, - }, - .resource = &physmap_flash_resource, - .num_resources = 1, -}; - -/***************************************************************************** - * Ethernet - ****************************************************************************/ -static struct resource cpmac_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC0, - .end = AR7_REGS_MAC0 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 27, - .end = 27, - }, -}; - -static struct resource cpmac_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC1, - .end = AR7_REGS_MAC1 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 41, - .end = 41, - }, -}; - -static struct fixed_phy_status fixed_phy_status __initdata = { - .link = 1, - .speed = 100, - .duplex = 1, -}; - -static struct plat_cpmac_data cpmac_low_data = { - .reset_bit = 17, - .power_bit = 20, - .phy_mask = 0x80000000, -}; - -static struct plat_cpmac_data cpmac_high_data = { - .reset_bit = 21, - .power_bit = 22, - .phy_mask = 0x7fffffff, -}; - -static u64 cpmac_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device cpmac_low = { - .id = 0, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_low_data, - }, - .resource = cpmac_low_res, - .num_resources = ARRAY_SIZE(cpmac_low_res), -}; - -static struct platform_device cpmac_high = { - .id = 1, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_high_data, - }, - .resource = cpmac_high_res, - .num_resources = ARRAY_SIZE(cpmac_high_res), -}; - -static void __init cpmac_get_mac(int instance, unsigned char *dev_addr) -{ - char name[5], *mac; - - sprintf(name, "mac%c", 'a' + instance); - mac = prom_getenv(name); - if (!mac && instance) { - sprintf(name, "mac%c", 'a'); - mac = prom_getenv(name); - } - - if (mac) { - if (!mac_pton(mac, dev_addr)) { - pr_warn("cannot parse mac address, using random address\n"); - eth_random_addr(dev_addr); - } - } else - eth_random_addr(dev_addr); -} - -/***************************************************************************** - * USB - ****************************************************************************/ -static struct resource usb_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_USB, - .end = AR7_REGS_USB + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 32, - .end = 32, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x03400000, - .end = 0x03401fff, - }, -}; - -static struct platform_device ar7_udc = { - .name = "ar7_udc", - .resource = usb_res, - .num_resources = ARRAY_SIZE(usb_res), -}; - -/***************************************************************************** - * LEDs - ****************************************************************************/ -static const struct gpio_led default_leds[] = { - { - .name = "status", - .gpio = 8, - .active_low = 1, - }, -}; - -static const struct gpio_led titan_leds[] = { - { .name = "status", .gpio = 8, .active_low = 1, }, - { .name = "wifi", .gpio = 13, .active_low = 1, }, -}; - -static const struct gpio_led dsl502t_leds[] = { - { - .name = "status", - .gpio = 9, - .active_low = 1, - }, - { - .name = "ethernet", - .gpio = 7, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, -}; - -static const struct gpio_led dg834g_leds[] = { - { - .name = "ppp", - .gpio = 6, - .active_low = 1, - }, - { - .name = "status", - .gpio = 7, - .active_low = 1, - }, - { - .name = "adsl", - .gpio = 8, - .active_low = 1, - }, - { - .name = "wifi", - .gpio = 12, - .active_low = 1, - }, - { - .name = "power", - .gpio = 14, - .active_low = 1, - .default_trigger = "default-on", - }, -}; - -static const struct gpio_led fb_sl_leds[] = { - { - .name = "1", - .gpio = 7, - }, - { - .name = "2", - .gpio = 13, - .active_low = 1, - }, - { - .name = "3", - .gpio = 10, - .active_low = 1, - }, - { - .name = "4", - .gpio = 12, - .active_low = 1, - }, - { - .name = "5", - .gpio = 9, - .active_low = 1, - }, -}; - -static const struct gpio_led fb_fon_leds[] = { - { - .name = "1", - .gpio = 8, - }, - { - .name = "2", - .gpio = 3, - .active_low = 1, - }, - { - .name = "3", - .gpio = 5, - }, - { - .name = "4", - .gpio = 4, - .active_low = 1, - }, - { - .name = "5", - .gpio = 11, - .active_low = 1, - }, -}; - -static const struct gpio_led gt701_leds[] = { - { - .name = "inet:green", - .gpio = 13, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, - { - .name = "inet:red", - .gpio = 9, - .active_low = 1, - }, - { - .name = "power:red", - .gpio = 7, - .active_low = 1, - }, - { - .name = "power:green", - .gpio = 8, - .active_low = 1, - .default_trigger = "default-on", - }, - { - .name = "ethernet", - .gpio = 10, - .active_low = 1, - }, -}; - -static struct gpio_led_platform_data ar7_led_data; - -static struct platform_device ar7_gpio_leds = { - .name = "leds-gpio", - .dev = { - .platform_data = &ar7_led_data, - } -}; - -static void __init detect_leds(void) -{ - char *prid, *usb_prod; - - /* Default LEDs */ - ar7_led_data.num_leds = ARRAY_SIZE(default_leds); - ar7_led_data.leds = default_leds; - - /* FIXME: the whole thing is unreliable */ - prid = prom_getenv("ProductID"); - usb_prod = prom_getenv("usb_prod"); - - /* If we can't get the product id from PROM, use the default LEDs */ - if (!prid) - return; - - if (strstr(prid, "Fritz_Box_FON")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_fon_leds); - ar7_led_data.leds = fb_fon_leds; - } else if (strstr(prid, "Fritz_Box_")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_sl_leds); - ar7_led_data.leds = fb_sl_leds; - } else if ((!strcmp(prid, "AR7RD") || !strcmp(prid, "AR7DB")) - && usb_prod != NULL && strstr(usb_prod, "DSL-502T")) { - ar7_led_data.num_leds = ARRAY_SIZE(dsl502t_leds); - ar7_led_data.leds = dsl502t_leds; - } else if (strstr(prid, "DG834")) { - ar7_led_data.num_leds = ARRAY_SIZE(dg834g_leds); - ar7_led_data.leds = dg834g_leds; - } else if (strstr(prid, "CYWM") || strstr(prid, "CYWL")) { - ar7_led_data.num_leds = ARRAY_SIZE(titan_leds); - ar7_led_data.leds = titan_leds; - } else if (strstr(prid, "GT701")) { - ar7_led_data.num_leds = ARRAY_SIZE(gt701_leds); - ar7_led_data.leds = gt701_leds; - } -} - -/***************************************************************************** - * Watchdog - ****************************************************************************/ -static struct resource ar7_wdt_res = { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = -1, /* Filled at runtime */ - .end = -1, /* Filled at runtime */ -}; - -static struct platform_device ar7_wdt = { - .name = "ar7_wdt", - .resource = &ar7_wdt_res, - .num_resources = 1, -}; - -/***************************************************************************** - * Init - ****************************************************************************/ -static int __init ar7_register_uarts(void) -{ -#ifdef CONFIG_SERIAL_8250 - static struct uart_port uart_port __initdata; - struct clk *bus_clk; - int res; - - memset(&uart_port, 0, sizeof(struct uart_port)); - - bus_clk = clk_get(NULL, "bus"); - if (IS_ERR(bus_clk)) - panic("unable to get bus clk"); - - uart_port.type = PORT_AR7; - uart_port.uartclk = clk_get_rate(bus_clk) / 2; - uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; - uart_port.regshift = 2; - - uart_port.line = 0; - uart_port.irq = AR7_IRQ_UART0; - uart_port.mapbase = AR7_REGS_UART0; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - - /* Only TNETD73xx have a second serial port */ - if (ar7_has_second_uart()) { - uart_port.line = 1; - uart_port.irq = AR7_IRQ_UART1; - uart_port.mapbase = UR8_REGS_UART1; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - } -#endif - - return 0; -} - -static void __init titan_fixup_devices(void) -{ - /* Set vlynq0 data */ - vlynq_low_data.reset_bit = 15; - vlynq_low_data.gpio_bit = 14; - - /* Set vlynq1 data */ - vlynq_high_data.reset_bit = 16; - vlynq_high_data.gpio_bit = 7; - - /* Set vlynq0 resources */ - vlynq_low_res[0].start = TITAN_REGS_VLYNQ0; - vlynq_low_res[0].end = TITAN_REGS_VLYNQ0 + 0xff; - vlynq_low_res[1].start = 33; - vlynq_low_res[1].end = 33; - vlynq_low_res[2].start = 0x0c000000; - vlynq_low_res[2].end = 0x0fffffff; - vlynq_low_res[3].start = 80; - vlynq_low_res[3].end = 111; - - /* Set vlynq1 resources */ - vlynq_high_res[0].start = TITAN_REGS_VLYNQ1; - vlynq_high_res[0].end = TITAN_REGS_VLYNQ1 + 0xff; - vlynq_high_res[1].start = 34; - vlynq_high_res[1].end = 34; - vlynq_high_res[2].start = 0x40000000; - vlynq_high_res[2].end = 0x43ffffff; - vlynq_high_res[3].start = 112; - vlynq_high_res[3].end = 143; - - /* Set cpmac0 data */ - cpmac_low_data.phy_mask = 0x40000000; - - /* Set cpmac1 data */ - cpmac_high_data.phy_mask = 0x80000000; - - /* Set cpmac0 resources */ - cpmac_low_res[0].start = TITAN_REGS_MAC0; - cpmac_low_res[0].end = TITAN_REGS_MAC0 + 0x7ff; - - /* Set cpmac1 resources */ - cpmac_high_res[0].start = TITAN_REGS_MAC1; - cpmac_high_res[0].end = TITAN_REGS_MAC1 + 0x7ff; -} - -static int __init ar7_register_devices(void) -{ - void __iomem *bootcr; - u32 val; - int res; - - res = ar7_gpio_init(); - if (res) - pr_warn("unable to register gpios: %d\n", res); - - res = ar7_register_uarts(); - if (res) - pr_err("unable to setup uart(s): %d\n", res); - - res = platform_device_register(&physmap_flash); - if (res) - pr_warn("unable to register physmap-flash: %d\n", res); - - if (ar7_is_titan()) - titan_fixup_devices(); - - ar7_device_disable(vlynq_low_data.reset_bit); - res = platform_device_register(&vlynq_low); - if (res) - pr_warn("unable to register vlynq-low: %d\n", res); - - if (ar7_has_high_vlynq()) { - ar7_device_disable(vlynq_high_data.reset_bit); - res = platform_device_register(&vlynq_high); - if (res) - pr_warn("unable to register vlynq-high: %d\n", res); - } - - if (ar7_has_high_cpmac()) { - res = fixed_phy_add(PHY_POLL, cpmac_high.id, - &fixed_phy_status); - if (!res) { - cpmac_get_mac(1, cpmac_high_data.dev_addr); - - res = platform_device_register(&cpmac_high); - if (res) - pr_warn("unable to register cpmac-high: %d\n", - res); - } else - pr_warn("unable to add cpmac-high phy: %d\n", res); - } else - cpmac_low_data.phy_mask = 0xffffffff; - - res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status); - if (!res) { - cpmac_get_mac(0, cpmac_low_data.dev_addr); - res = platform_device_register(&cpmac_low); - if (res) - pr_warn("unable to register cpmac-low: %d\n", res); - } else - pr_warn("unable to add cpmac-low phy: %d\n", res); - - detect_leds(); - res = platform_device_register(&ar7_gpio_leds); - if (res) - pr_warn("unable to register leds: %d\n", res); - - res = platform_device_register(&ar7_udc); - if (res) - pr_warn("unable to register usb slave: %d\n", res); - - /* Register watchdog only if enabled in hardware */ - bootcr = ioremap(AR7_REGS_DCL, 4); - val = readl(bootcr); - iounmap(bootcr); - if (val & AR7_WDT_HW_ENA) { - if (ar7_has_high_vlynq()) - ar7_wdt_res.start = UR8_REGS_WDT; - else - ar7_wdt_res.start = AR7_REGS_WDT; - - ar7_wdt_res.end = ar7_wdt_res.start + 0x20; - res = platform_device_register(&ar7_wdt); - if (res) - pr_warn("unable to register watchdog: %d\n", res); - } - - return 0; -} -device_initcall(ar7_register_devices); diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c deleted file mode 100644 index 5810d3993fc6..000000000000 --- a/arch/mips/ar7/prom.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Putting things on the screen/serial line using YAMONs facilities. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/serial_reg.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/string.h> -#include <linux/io.h> -#include <asm/bootinfo.h> -#include <asm/setup.h> - -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -#define MAX_ENTRY 80 - -struct env_var { - char *name; - char *value; -}; - -static struct env_var adam2_env[MAX_ENTRY]; - -char *prom_getenv(const char *name) -{ - int i; - - for (i = 0; (i < MAX_ENTRY) && adam2_env[i].name; i++) - if (!strcmp(name, adam2_env[i].name)) - return adam2_env[i].value; - - return NULL; -} -EXPORT_SYMBOL(prom_getenv); - -static void __init ar7_init_cmdline(int argc, char *argv[]) -{ - int i; - - for (i = 1; i < argc; i++) { - strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE); - if (i < (argc - 1)) - strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE); - } -} - -struct psbl_rec { - u32 psbl_size; - u32 env_base; - u32 env_size; - u32 ffs_base; - u32 ffs_size; -}; - -static const char psp_env_version[] __initconst = "TIENV0.8"; - -struct psp_env_chunk { - u8 num; - u8 ctrl; - u16 csum; - u8 len; - char data[11]; -} __packed; - -struct psp_var_map_entry { - u8 num; - char *value; -}; - -static const struct psp_var_map_entry psp_var_map[] = { - { 1, "cpufrequency" }, - { 2, "memsize" }, - { 3, "flashsize" }, - { 4, "modetty0" }, - { 5, "modetty1" }, - { 8, "maca" }, - { 9, "macb" }, - { 28, "sysfrequency" }, - { 38, "mipsfrequency" }, -}; - -/* - -Well-known variable (num is looked up in table above for matching variable name) -Example: cpufrequency=211968000 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 01 |CTRL|CHECKSUM | 01 | _2 | _1 | _1 | _9 | _6 | _8 | _0 | _0 | _0 | \0 | FF -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in a single chunk -Example: NAME=VALUE -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 01 | _N | _A | _M | _E | _0 | _V | _A | _L | _U | _E | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in 2 chunks (len is the number of chunks) -Example: bootloaderVersion=1.3.7.15 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 02 | _b | _o | _o | _t | _l | _o | _a | _d | _e | _r | _V -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| _e | _r | _s | _i | _o | _n | \0 | _1 | _. | _3 | _. | _7 | _. | _1 | _5 | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Data is padded with 0xFF - -*/ - -#define PSP_ENV_SIZE 4096 - -static char psp_env_data[PSP_ENV_SIZE] = { 0, }; - -static char * __init lookup_psp_var_map(u8 num) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(psp_var_map); i++) - if (psp_var_map[i].num == num) - return psp_var_map[i].value; - - return NULL; -} - -static void __init add_adam2_var(char *name, char *value) -{ - int i; - - for (i = 0; i < MAX_ENTRY; i++) { - if (!adam2_env[i].name) { - adam2_env[i].name = name; - adam2_env[i].value = value; - return; - } else if (!strcmp(adam2_env[i].name, name)) { - adam2_env[i].value = value; - return; - } - } -} - -static int __init parse_psp_env(void *psp_env_base) -{ - int i, n; - char *name, *value; - struct psp_env_chunk *chunks = (struct psp_env_chunk *)psp_env_data; - - memcpy_fromio(chunks, psp_env_base, PSP_ENV_SIZE); - - i = 1; - n = PSP_ENV_SIZE / sizeof(struct psp_env_chunk); - while (i < n) { - if ((chunks[i].num == 0xff) || ((i + chunks[i].len) > n)) - break; - value = chunks[i].data; - if (chunks[i].num) { - name = lookup_psp_var_map(chunks[i].num); - } else { - name = value; - value += strlen(name) + 1; - } - if (name) - add_adam2_var(name, value); - i += chunks[i].len; - } - return 0; -} - -static void __init ar7_init_env(struct env_var *env) -{ - int i; - struct psbl_rec *psbl = (struct psbl_rec *)(KSEG1ADDR(0x14000300)); - void *psp_env = (void *)KSEG1ADDR(psbl->env_base); - - if (strcmp(psp_env, psp_env_version) == 0) { - parse_psp_env(psp_env); - } else { - for (i = 0; i < MAX_ENTRY; i++, env++) - if (env->name) - add_adam2_var(env->name, env->value); - } -} - -static void __init console_config(void) -{ -#ifdef CONFIG_SERIAL_8250_CONSOLE - char console_string[40]; - int baud = 0; - char parity = '\0', bits = '\0', flow = '\0'; - char *s, *p; - - if (strstr(arcs_cmdline, "console=")) - return; - - s = prom_getenv("modetty0"); - if (s) { - baud = simple_strtoul(s, &p, 10); - s = p; - if (*s == ',') - s++; - if (*s) - parity = *s++; - if (*s == ',') - s++; - if (*s) - bits = *s++; - if (*s == ',') - s++; - if (*s == 'h') - flow = 'r'; - } - - if (baud == 0) - baud = 38400; - if (parity != 'n' && parity != 'o' && parity != 'e') - parity = 'n'; - if (bits != '7' && bits != '8') - bits = '8'; - - if (flow == 'r') - sprintf(console_string, " console=ttyS0,%d%c%c%c", baud, - parity, bits, flow); - else - sprintf(console_string, " console=ttyS0,%d%c%c", baud, parity, - bits); - strlcat(arcs_cmdline, console_string, COMMAND_LINE_SIZE); -#endif -} - -void __init prom_init(void) -{ - ar7_init_cmdline(fw_arg0, (char **)fw_arg1); - ar7_init_env((struct env_var *)fw_arg2); - console_config(); -} - -#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) -static inline unsigned int serial_in(int offset) -{ - return readl((void *)PORT(offset)); -} - -static inline void serial_out(int offset, int value) -{ - writel(value, (void *)PORT(offset)); -} - -void prom_putchar(char c) -{ - while ((serial_in(UART_LSR) & UART_LSR_TEMT) == 0) - ; - serial_out(UART_TX, c); -} diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c deleted file mode 100644 index 352d5dbc777c..000000000000 --- a/arch/mips/ar7/setup.c +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - */ -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/pm.h> -#include <linux/time.h> - -#include <asm/reboot.h> -#include <asm/mach-ar7/ar7.h> -#include <asm/mach-ar7/prom.h> - -static void ar7_machine_restart(char *command) -{ - u32 *softres_reg = ioremap(AR7_REGS_RESET + AR7_RESET_SOFTWARE, 1); - - writel(1, softres_reg); -} - -static void ar7_machine_halt(void) -{ - while (1) - ; -} - -static void ar7_machine_power_off(void) -{ - u32 *power_reg = (u32 *)ioremap(AR7_REGS_POWER, 1); - u32 power_state = readl(power_reg) | (3 << 30); - - writel(power_state, power_reg); - ar7_machine_halt(); -} - -const char *get_system_type(void) -{ - u16 chip_id = ar7_chip_id(); - u16 titan_variant_id = titan_chip_id(); - - switch (chip_id) { - case AR7_CHIP_7100: - return "TI AR7 (TNETD7100)"; - case AR7_CHIP_7200: - return "TI AR7 (TNETD7200)"; - case AR7_CHIP_7300: - return "TI AR7 (TNETD7300)"; - case AR7_CHIP_TITAN: - switch (titan_variant_id) { - case TITAN_CHIP_1050: - return "TI AR7 (TNETV1050)"; - case TITAN_CHIP_1055: - return "TI AR7 (TNETV1055)"; - case TITAN_CHIP_1056: - return "TI AR7 (TNETV1056)"; - case TITAN_CHIP_1060: - return "TI AR7 (TNETV1060)"; - } - fallthrough; - default: - return "TI AR7 (unknown)"; - } -} - -static int __init ar7_init_console(void) -{ - return 0; -} -console_initcall(ar7_init_console); - -/* - * Initializes basic routines and structures pointers, memory size (as - * given by the bios and saves the command line. - */ -void __init plat_mem_setup(void) -{ - unsigned long io_base; - - _machine_restart = ar7_machine_restart; - _machine_halt = ar7_machine_halt; - pm_power_off = ar7_machine_power_off; - - io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000); - if (!io_base) - panic("Can't remap IO base!"); - set_io_port_base(io_base); - - prom_meminit(); - - printk(KERN_INFO "%s, ID: 0x%04x, Revision: 0x%02x\n", - get_system_type(), ar7_chip_id(), ar7_chip_rev()); -} diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c deleted file mode 100644 index 72aa77d7087b..000000000000 --- a/arch/mips/ar7/time.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Setting up the clock on the MIPS boards. - */ - -#include <linux/init.h> -#include <linux/time.h> -#include <linux/err.h> -#include <linux/clk.h> - -#include <asm/time.h> -#include <asm/mach-ar7/ar7.h> - -void __init plat_time_init(void) -{ - struct clk *cpu_clk; - - /* Initialize ar7 clocks so the CPU clock frequency is correct */ - ar7_init_clocks(); - - cpu_clk = clk_get(NULL, "cpu"); - if (IS_ERR(cpu_clk)) { - printk(KERN_ERR "unable to get cpu clock\n"); - return; - } - - mips_hpt_frequency = clk_get_rate(cpu_clk) / 2; -} diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 96d28f211121..09dcd2c561d9 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -13,11 +13,6 @@ #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset)) #endif -#ifdef CONFIG_AR7 -#include <ar7.h> -#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) -#endif - #ifdef CONFIG_MACH_INGENIC #define INGENIC_UART_BASE_ADDR (0x10030000 + 0x1000 * CONFIG_ZBOOT_INGENIC_UART) #define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset)) diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi index acbbe8c4664c..c5c5a094c37d 100644 --- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi @@ -366,7 +366,6 @@ rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 9c0099919db7..504e895e916e 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -461,7 +461,6 @@ rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts index 129b6710b699..f9c262cc2e96 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc1", "mediatek,mt7621-soc"; - model = "GB-PC1"; + model = "GnuBee GB-PC1"; memory@0 { device_type = "memory"; diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts index f810cd10f4f4..b281e13f22ed 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc"; - model = "GB-PC2"; + model = "GnuBee GB-PC2"; memory@0 { device_type = "memory"; diff --git a/arch/mips/boot/dts/ralink/mt7621.dtsi b/arch/mips/boot/dts/ralink/mt7621.dtsi index 7caed0d14f11..35a10258f235 100644 --- a/arch/mips/boot/dts/ralink/mt7621.dtsi +++ b/arch/mips/boot/dts/ralink/mt7621.dtsi @@ -300,14 +300,13 @@ compatible = "mediatek,mt7621-eth"; reg = <0x1e100000 0x10000>; - clocks = <&sysc MT7621_CLK_FE>, - <&sysc MT7621_CLK_ETH>; + clocks = <&sysc MT7621_CLK_FE>, <&sysc MT7621_CLK_ETH>; clock-names = "fe", "ethif"; #address-cells = <1>; #size-cells = <0>; - resets = <&sysc MT7621_RST_FE &sysc MT7621_RST_ETH>; + resets = <&sysc MT7621_RST_FE>, <&sysc MT7621_RST_ETH>; reset-names = "fe", "eth"; interrupt-parent = <&gic>; diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig deleted file mode 100644 index 329c60aa570a..000000000000 --- a/arch/mips/configs/ar7_defconfig +++ /dev/null @@ -1,119 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_KERNEL_LZMA=y -CONFIG_SYSVIPC=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_ELF_CORE is not set -# CONFIG_KALLSYMS is not set -# CONFIG_VM_EVENT_COUNTERS is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_AR7=y -CONFIG_HZ_100=y -CONFIG_KEXEC=y -# CONFIG_SECCOMP is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_BSD_DISKLABEL=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_MROUTE=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_DIAG is not set -CONFIG_TCP_CONG_ADVANCED=y -# CONFIG_TCP_CONG_BIC is not set -# CONFIG_TCP_CONG_CUBIC is not set -CONFIG_TCP_CONG_WESTWOOD=y -# CONFIG_TCP_CONG_HTCP is not set -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_BRIDGE_NETFILTER is not set -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_RAW=m -CONFIG_ATM=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_BRIDGE=y -CONFIG_VLAN_8021Q=y -CONFIG_NET_SCHED=y -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=y -CONFIG_HAMRADIO=y -CONFIG_CFG80211=m -CONFIG_MAC80211=m -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_PHYSMAP=y -CONFIG_NETDEVICES=y -CONFIG_CPMAC=y -CONFIG_FIXED_PHY=y -CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOATM=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_HW_RANDOM=y -CONFIG_GPIO_SYSFS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_AR7_WDT=y -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -# CONFIG_DNOTIFY is not set -CONFIG_PROC_KCORE=y -# CONFIG_PROC_PAGE_MONITOR is not set -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_SUMMARY=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_SQUASHFS=y -# CONFIG_CRYPTO_HW is not set -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_FS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="rootfstype=squashfs,jffs2" diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 1843468f84a3..00329bb5de5a 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -177,7 +177,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index fdf374574105..65adb538030d 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -70,10 +70,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 83d9a8ff4270..38f17b658421 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -229,9 +229,6 @@ CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index ae1a7793e810..6f8046024557 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -317,11 +317,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index c07e30f63d8b..16a91eeff67f 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 0a5701020d3f..264aba29ea4f 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 5c5e2186210c..08e1c1f2f4de 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -310,10 +310,6 @@ CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index 68b1e5d458cf..bc27d99c9436 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -71,8 +71,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; }; -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); #endif /* CONFIG_KPROBES */ #endif /* _ASM_KPROBES_H */ diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h deleted file mode 100644 index 1e8621a6afa3..000000000000 --- a/arch/mips/include/asm/mach-ar7/ar7.h +++ /dev/null @@ -1,191 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006,2007 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2006,2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __AR7_H__ -#define __AR7_H__ - -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/errno.h> - -#include <asm/addrspace.h> - -#define AR7_SDRAM_BASE 0x14000000 - -#define AR7_REGS_BASE 0x08610000 - -#define AR7_REGS_MAC0 (AR7_REGS_BASE + 0x0000) -#define AR7_REGS_GPIO (AR7_REGS_BASE + 0x0900) -/* 0x08610A00 - 0x08610BFF (512 bytes, 128 bytes / clock) */ -#define AR7_REGS_POWER (AR7_REGS_BASE + 0x0a00) -#define AR7_REGS_CLOCKS (AR7_REGS_POWER + 0x80) -#define UR8_REGS_CLOCKS (AR7_REGS_POWER + 0x20) -#define AR7_REGS_UART0 (AR7_REGS_BASE + 0x0e00) -#define AR7_REGS_USB (AR7_REGS_BASE + 0x1200) -#define AR7_REGS_RESET (AR7_REGS_BASE + 0x1600) -#define AR7_REGS_PINSEL (AR7_REGS_BASE + 0x160C) -#define AR7_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1800) -#define AR7_REGS_DCL (AR7_REGS_BASE + 0x1a00) -#define AR7_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1c00) -#define AR7_REGS_MDIO (AR7_REGS_BASE + 0x1e00) -#define AR7_REGS_IRQ (AR7_REGS_BASE + 0x2400) -#define AR7_REGS_MAC1 (AR7_REGS_BASE + 0x2800) - -#define AR7_REGS_WDT (AR7_REGS_BASE + 0x1f00) -#define UR8_REGS_WDT (AR7_REGS_BASE + 0x0b00) -#define UR8_REGS_UART1 (AR7_REGS_BASE + 0x0f00) - -/* Titan registers */ -#define TITAN_REGS_ESWITCH_BASE (0x08640000) -#define TITAN_REGS_MAC0 (TITAN_REGS_ESWITCH_BASE) -#define TITAN_REGS_MAC1 (TITAN_REGS_ESWITCH_BASE + 0x0800) -#define TITAN_REGS_MDIO (TITAN_REGS_ESWITCH_BASE + 0x02000) -#define TITAN_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1c00) -#define TITAN_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1300) - -#define AR7_RESET_PERIPHERAL 0x0 -#define AR7_RESET_SOFTWARE 0x4 -#define AR7_RESET_STATUS 0x8 - -#define AR7_RESET_BIT_CPMAC_LO 17 -#define AR7_RESET_BIT_CPMAC_HI 21 -#define AR7_RESET_BIT_MDIO 22 -#define AR7_RESET_BIT_EPHY 26 - -#define TITAN_RESET_BIT_EPHY1 28 - -/* GPIO control registers */ -#define AR7_GPIO_INPUT 0x0 -#define AR7_GPIO_OUTPUT 0x4 -#define AR7_GPIO_DIR 0x8 -#define AR7_GPIO_ENABLE 0xc -#define TITAN_GPIO_INPUT_0 0x0 -#define TITAN_GPIO_INPUT_1 0x4 -#define TITAN_GPIO_OUTPUT_0 0x8 -#define TITAN_GPIO_OUTPUT_1 0xc -#define TITAN_GPIO_DIR_0 0x10 -#define TITAN_GPIO_DIR_1 0x14 -#define TITAN_GPIO_ENBL_0 0x18 -#define TITAN_GPIO_ENBL_1 0x1c - -#define AR7_CHIP_7100 0x18 -#define AR7_CHIP_7200 0x2b -#define AR7_CHIP_7300 0x05 -#define AR7_CHIP_TITAN 0x07 -#define TITAN_CHIP_1050 0x0f -#define TITAN_CHIP_1055 0x0e -#define TITAN_CHIP_1056 0x0d -#define TITAN_CHIP_1060 0x07 - -/* Interrupts */ -#define AR7_IRQ_UART0 15 -#define AR7_IRQ_UART1 16 - -/* Clocks */ -#define AR7_AFE_CLOCK 35328000 -#define AR7_REF_CLOCK 25000000 -#define AR7_XTAL_CLOCK 24000000 - -/* DCL */ -#define AR7_WDT_HW_ENA 0x10 - -struct plat_cpmac_data { - int reset_bit; - int power_bit; - u32 phy_mask; - char dev_addr[6]; -}; - -struct plat_dsl_data { - int reset_bit_dsl; - int reset_bit_sar; -}; - -static inline int ar7_is_titan(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x24)) & 0xffff) == - AR7_CHIP_TITAN; -} - -static inline u16 ar7_chip_id(void) -{ - return ar7_is_titan() ? AR7_CHIP_TITAN : (readl((void *) - KSEG1ADDR(AR7_REGS_GPIO + 0x14)) & 0xffff); -} - -static inline u16 titan_chip_id(void) -{ - unsigned int val = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + - TITAN_GPIO_INPUT_1)); - return ((val >> 12) & 0x0f); -} - -static inline u8 ar7_chip_rev(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + (ar7_is_titan() ? 0x24 : - 0x14))) >> 16) & 0xff; -} - -static inline int ar7_has_high_cpmac(void) -{ - u16 chip_id = ar7_chip_id(); - switch (chip_id) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - return 0; - case AR7_CHIP_7300: - return 1; - default: - return -ENXIO; - } -} -#define ar7_has_high_vlynq ar7_has_high_cpmac -#define ar7_has_second_uart ar7_has_high_cpmac - -static inline void ar7_device_enable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) | (1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_disable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) & ~(1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_reset(u32 bit) -{ - ar7_device_disable(bit); - ar7_device_enable(bit); -} - -static inline void ar7_device_on(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) | (1 << bit), power_reg); - msleep(20); -} - -static inline void ar7_device_off(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) & ~(1 << bit), power_reg); - msleep(20); -} - -int __init ar7_gpio_init(void); -void __init ar7_init_clocks(void); - -/* Board specific GPIO functions */ -int ar7_gpio_enable(unsigned gpio); -int ar7_gpio_disable(unsigned gpio); - -#endif /* __AR7_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/irq.h b/arch/mips/include/asm/mach-ar7/irq.h deleted file mode 100644 index 46bb730ea970..000000000000 --- a/arch/mips/include/asm/mach-ar7/irq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Shamelessly copied from asm-mips/mach-emma2rh/ - * Copyright (C) 2003 by Ralf Baechle - */ -#ifndef __ASM_AR7_IRQ_H -#define __ASM_AR7_IRQ_H - -#define NR_IRQS 256 - -#include <asm/mach-generic/irq.h> - -#endif /* __ASM_AR7_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ar7/prom.h b/arch/mips/include/asm/mach-ar7/prom.h deleted file mode 100644 index 9e1d20b06f57..000000000000 --- a/arch/mips/include/asm/mach-ar7/prom.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Florian Fainelli <florian@openwrt.org> - */ - -#ifndef __PROM_H__ -#define __PROM_H__ - -extern char *prom_getenv(const char *name); -extern void prom_meminit(void); - -#endif /* __PROM_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/spaces.h b/arch/mips/include/asm/mach-ar7/spaces.h deleted file mode 100644 index a004d94dfbdd..000000000000 --- a/arch/mips/include/asm/mach-ar7/spaces.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle - * Copyright (C) 2000, 2002 Maciej W. Rozycki - * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc. - */ -#ifndef _ASM_AR7_SPACES_H -#define _ASM_AR7_SPACES_H - -/* - * This handles the memory map. - * We handle pages at KSEG0 for kernels with 32 bit address space. - */ -#define PAGE_OFFSET _AC(0x94000000, UL) -#define PHYS_OFFSET _AC(0x14000000, UL) - -#include <asm/mach-generic/spaces.h> - -#endif /* __ASM_AR7_SPACES_H */ diff --git a/arch/mips/include/asm/mach-loongson32/dma.h b/arch/mips/include/asm/mach-loongson32/dma.h deleted file mode 100644 index e917b3ccb2c2..000000000000 --- a/arch/mips/include/asm/mach-loongson32/dma.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_DMA_H -#define __ASM_MACH_LOONGSON32_DMA_H - -#define LS1X_DMA_CHANNEL0 0 -#define LS1X_DMA_CHANNEL1 1 -#define LS1X_DMA_CHANNEL2 2 - -struct plat_ls1x_dma { - int nr_channels; -}; - -extern struct plat_ls1x_dma ls1b_dma_pdata; - -#endif /* __ASM_MACH_LOONGSON32_DMA_H */ diff --git a/arch/mips/include/asm/mach-loongson32/nand.h b/arch/mips/include/asm/mach-loongson32/nand.h deleted file mode 100644 index aaf5ed19d78d..000000000000 --- a/arch/mips/include/asm/mach-loongson32/nand.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_NAND_H -#define __ASM_MACH_LOONGSON32_NAND_H - -#include <linux/dmaengine.h> -#include <linux/mtd/partitions.h> - -struct plat_ls1x_nand { - struct mtd_partition *parts; - unsigned int nr_parts; - - int hold_cycle; - int wait_cycle; -}; - -extern struct plat_ls1x_nand ls1b_nand_pdata; - -bool ls1x_dma_filter_fn(struct dma_chan *chan, void *param); - -#endif /* __ASM_MACH_LOONGSON32_NAND_H */ diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h index 2cdcfb5f6012..f74292b13bc3 100644 --- a/arch/mips/include/asm/mach-loongson32/platform.h +++ b/arch/mips/include/asm/mach-loongson32/platform.h @@ -8,9 +8,6 @@ #include <linux/platform_device.h> -#include <dma.h> -#include <nand.h> - extern struct platform_device ls1x_uart_pdev; extern struct platform_device ls1x_eth0_pdev; extern struct platform_device ls1x_eth1_pdev; diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f5b2ef979b43..8f0a7263a9d6 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -66,7 +66,6 @@ copy_word: LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word - b process_entry done: #ifdef CONFIG_SMP diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 8075590a9f83..623eb4bc7b41 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -15,8 +15,6 @@ #include <platform.h> #include <loongson1.h> -#include <dma.h> -#include <nand.h> /* 8250/16550 compatible UART */ #define LS1X_UART(_id) \ diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c index fed8d432ef20..fe115bdcb22c 100644 --- a/arch/mips/loongson32/ls1b/board.c +++ b/arch/mips/loongson32/ls1b/board.c @@ -8,8 +8,6 @@ #include <linux/sizes.h> #include <loongson1.h> -#include <dma.h> -#include <nand.h> #include <platform.h> static const struct gpio_led ls1x_gpio_leds[] __initconst = { diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 105569c1b712..13009666204f 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -4,8 +4,8 @@ * Copyright (C) 2012 John Crispin <john@phrozen.org> */ -#include <linux/of_irq.h> #include <linux/of_pci.h> +#include <linux/pci.h> int (*ltq_pci_plat_arch_init)(struct pci_dev *dev) = NULL; int (*ltq_pci_plat_dev_init)(struct pci_dev *dev) = NULL; diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h index 3cecf14d51de..c0c5d1df7ad1 100644 --- a/arch/powerpc/include/asm/fb.h +++ b/arch/powerpc/include/asm/fb.h @@ -8,12 +8,7 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot, unsigned long vm_start, unsigned long vm_end, unsigned long offset) { - /* - * PowerPC's implementation of phys_mem_access_prot() does - * not use the file argument. Set it to NULL in preparation - * of later updates to the interface. - */ - return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot); + return __phys_mem_access_prot(PHYS_PFN(offset), vm_end - vm_start, prot); } #define pgprot_framebuffer pgprot_framebuffer diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index c8e4b4fd4e33..4525a9c68260 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -84,8 +84,6 @@ struct arch_optimized_insn { kprobe_opcode_t *insn; }; -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4f6e7d7ee388..d31a5ec1550d 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,7 +10,7 @@ #include <linux/export.h> struct pt_regs; -struct pci_bus; +struct pci_bus; struct device_node; struct iommu_table; struct rtc_time; @@ -78,8 +78,8 @@ struct machdep_calls { unsigned char (*nvram_read_val)(int addr); void (*nvram_write_val)(int addr, unsigned char val); ssize_t (*nvram_write)(char *buf, size_t count, loff_t *index); - ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index); - ssize_t (*nvram_size)(void); + ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index); + ssize_t (*nvram_size)(void); void (*nvram_sync)(void); /* Exception handlers */ @@ -102,12 +102,11 @@ struct machdep_calls { */ long (*feature_call)(unsigned int feature, ...); - /* Get legacy PCI/IDE interrupt mapping */ + /* Get legacy PCI/IDE interrupt mapping */ int (*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel); - + /* Get access protection for /dev/mem */ - pgprot_t (*phys_mem_access_prot)(struct file *file, - unsigned long pfn, + pgprot_t (*phys_mem_access_prot)(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index f5078a7dd85a..46a9c4491ed0 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -105,9 +105,7 @@ extern void of_scan_pci_bridge(struct pci_dev *dev); extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus); -struct file; -extern pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +extern pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 2bfb7dd3b49e..9224f23065ff 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -120,9 +120,15 @@ static inline void mark_initmem_nx(void) { } int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); +pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); + struct file; -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); +static inline pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + return __phys_mem_access_prot(pfn, size, vma_prot); +} #define __HAVE_PHYS_MEM_ACCESS_PROT void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 3e28579f7c62..ebe259bdd462 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1280,13 +1280,19 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = { /* * A simple iommu_ops to allow less cruft in generic VFIO code. */ -static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, - struct device *dev) +static int +spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; int ret = -EINVAL; + /* At first attach the ownership is already set */ + if (!domain) + return 0; + if (!grp) return -ENODEV; @@ -1297,17 +1303,22 @@ static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, return ret; } -static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev) -{ - struct iommu_group *grp = iommu_group_get(dev); - struct iommu_table_group *table_group; +static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { + .attach_dev = spapr_tce_platform_iommu_attach_dev, +}; - table_group = iommu_group_get_iommudata(grp); - table_group->ops->release_ownership(table_group); -} +static struct iommu_domain spapr_tce_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &spapr_tce_platform_domain_ops, +}; -static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { - .attach_dev = spapr_tce_blocking_iommu_attach_dev, +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + /* + * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain + * also sets the dma_api ops + */ + .ops = &spapr_tce_platform_domain_ops, }; static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) @@ -1322,22 +1333,6 @@ static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) -{ - struct iommu_domain *dom; - - if (type != IOMMU_DOMAIN_BLOCKED) - return NULL; - - dom = kzalloc(sizeof(*dom), GFP_KERNEL); - if (!dom) - return NULL; - - dom->ops = &spapr_tce_blocking_domain_ops; - - return dom; -} - static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) { struct pci_dev *pdev; @@ -1371,12 +1366,12 @@ static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) } static const struct iommu_ops spapr_tce_iommu_ops = { + .default_domain = &spapr_tce_platform_domain, + .blocked_domain = &spapr_tce_blocked_domain, .capable = spapr_tce_iommu_capable, - .domain_alloc = spapr_tce_iommu_domain_alloc, .probe_device = spapr_tce_iommu_probe_device, .release_device = spapr_tce_iommu_release_device, .device_group = spapr_tce_iommu_device_group, - .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma, }; static struct attribute *spapr_tce_iommu_attrs[] = { diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 040255ddb569..d95a48eff412 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -521,8 +521,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) * PCI device, it tries to find the PCI device first and calls the * above routine */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot) { diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 9454b8395b6a..f38df72e64b8 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) /** * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * @m: seq_file output target. + * @v: Unused. * * Base + size description of a range of RTAS-addressable memory set * aside for user space to use as work area(s) for certain RTAS diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1717554b04b1..3a440004b97d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,18 +35,18 @@ unsigned long long memory_limit; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) +pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size, + pgprot_t vma_prot) { if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); + return ppc_md.phys_mem_access_prot(pfn, size, vma_prot); if (!page_is_ram(pfn)) vma_prot = pgprot_noncached(vma_prot); return vma_prot; } -EXPORT_SYMBOL(phys_mem_access_prot); +EXPORT_SYMBOL(__phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG static DEFINE_MUTEX(linear_mapping_mutex); diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c index b37d52f40360..7fe34bee84d8 100644 --- a/arch/powerpc/platforms/pseries/rtas-work-area.c +++ b/arch/powerpc/platforms/pseries/rtas-work-area.c @@ -184,6 +184,7 @@ machine_arch_initcall(pseries, rtas_work_area_allocator_init); /** * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas. + * @limit: Upper limit for memblock allocation. */ void __init rtas_work_area_reserve_arena(const phys_addr_t limit) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index eaa15a20e6ae..95a2a06acc6a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -39,6 +39,7 @@ config RISCV select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_VDSO_DATA + select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK @@ -48,6 +49,7 @@ config RISCV select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU + select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG @@ -174,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=8) +config HAVE_SHADOW_CALL_STACK + def_bool $(cc-option,-fsanitize=shadow-call-stack) + # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769 + depends on $(ld-option,--no-relax-gp) + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 @@ -635,6 +642,15 @@ config THREAD_SIZE_ORDER Specify the Pages of thread stack size (from 4KB to 64KB), which also affects irq stack size, which is equal to thread stack size. +config RISCV_MISALIGNED + bool "Support misaligned load/store traps for kernel and userspace" + select SYSCTL_ARCH_UNALIGN_ALLOW + default y + help + Say Y here if you want the kernel to embed support for misaligned + load/store for both kernel and userspace. When disable, misaligned + accesses will generate SIGBUS in userspace and panic in kernel. + endmenu # "Platform type" menu "Kernel features" @@ -902,6 +918,9 @@ config PORTABLE select MMU select OF +config ARCH_PROC_KCORE_TEXT + def_bool y + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug index e69de29bb2d1..eafe17ebf710 100644 --- a/arch/riscv/Kconfig.debug +++ b/arch/riscv/Kconfig.debug @@ -0,0 +1 @@ +source "arch/riscv/kernel/tests/Kconfig.debug" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4d06f3402674..a74be78678eb 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -54,6 +54,10 @@ endif endif endif +ifeq ($(CONFIG_SHADOW_CALL_STACK),y) + KBUILD_LDFLAGS += --no-relax-gp +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 22b13947bd13..8e7fc0edf21d 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -17,6 +17,7 @@ KCOV_INSTRUMENT := n OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.* loader loader.o loader.lds loader.bin diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 1edf3cd886c5..905881282a7c 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -37,6 +37,13 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_PM=y CONFIG_CPU_IDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m +CONFIG_CPUFREQ_DT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_ACPI=y @@ -95,6 +102,7 @@ CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y +CONFIG_CAN=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -102,6 +110,11 @@ CONFIG_PCIE_XILINX=y CONFIG_PCIE_FU740=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_NVME=m @@ -124,8 +137,11 @@ CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y +CONFIG_RAVB=y CONFIG_STMMAC_ETH=m +CONFIG_MICREL_PHY=y CONFIG_MICROSEMI_PHY=y +CONFIG_CAN_RCAR_CANFD=m CONFIG_INPUT_MOUSEDEV=y CONFIG_KEYBOARD_SUN4I_LRADC=m CONFIG_SERIAL_8250=y @@ -136,16 +152,24 @@ CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_I2C_CHARDEV=m CONFIG_I2C_MV64XXX=m +CONFIG_I2C_RIIC=y CONFIG_SPI=y +CONFIG_SPI_RSPI=m CONFIG_SPI_SIFIVE=y CONFIG_SPI_SUN6I=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_GPIO_SIFIVE=y +CONFIG_CPU_THERMAL=y +CONFIG_DEVFREQ_THERMAL=y +CONFIG_RZG2L_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y +CONFIG_RENESAS_RZG2LWDT=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_GPIO=y CONFIG_DRM=m CONFIG_DRM_RADEON=m CONFIG_DRM_NOUVEAU=m @@ -153,39 +177,69 @@ CONFIG_DRM_SUN4I=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SOC=y +CONFIG_SND_SOC_RZ=m +CONFIG_SND_SOC_WM8978=m +CONFIG_SND_SIMPLE_CARD=m CONFIG_USB=y +CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_RENESAS_USBHS=m CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_USB_MUSB_HDRC=m CONFIG_USB_MUSB_SUNXI=m CONFIG_NOP_USB_XCEIV=m +CONFIG_USB_GADGET=y +CONFIG_USB_RENESAS_USBHS_UDC=m +CONFIG_USB_CONFIGFS=m +CONFIG_USB_CONFIGFS_SERIAL=y +CONFIG_USB_CONFIGFS_ACM=y +CONFIG_USB_CONFIGFS_OBEX=y +CONFIG_USB_CONFIGFS_NCM=y +CONFIG_USB_CONFIGFS_ECM=y +CONFIG_USB_CONFIGFS_ECM_SUBSET=y +CONFIG_USB_CONFIGFS_RNDIS=y +CONFIG_USB_CONFIGFS_EEM=y +CONFIG_USB_CONFIGFS_MASS_STORAGE=y +CONFIG_USB_CONFIGFS_F_FS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_STARFIVE=y +CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m +CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RENESAS_OSTM=y CONFIG_SUN8I_DE2_CCU=m CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_ARCH_R9A07G043=y +CONFIG_IIO=y +CONFIG_RZG2L_ADC=m +CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_PHY_SUN4I_USB=m +CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_LIBNVDIMM=y CONFIG_NVMEM_SUNXI_SID=y CONFIG_EXT4_FS=y diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index d5604d2073bc..7dad0cf9d701 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -66,6 +66,8 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, + u32 *cboz_size, u32 *cbop_size); #else static inline void acpi_init_rintc_map(void) { } static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) @@ -79,6 +81,10 @@ static inline int acpi_get_riscv_isa(struct acpi_table_header *table, return -EINVAL; } +static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, + u32 *cbom_size, u32 *cboz_size, + u32 *cbop_size) { } + #endif /* CONFIG_ACPI */ #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 61ba8ed43d8f..36b955c762ba 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); -asmlinkage unsigned long get_overflow_stack(void); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 114bbadaef41..b0487b39e674 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -82,6 +82,47 @@ .endr .endm +#ifdef CONFIG_SMP +#ifdef CONFIG_32BIT +#define PER_CPU_OFFSET_SHIFT 2 +#else +#define PER_CPU_OFFSET_SHIFT 3 +#endif + +.macro asm_per_cpu dst sym tmp + REG_L \tmp, TASK_TI_CPU_NUM(tp) + slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + la \dst, __per_cpu_offset + add \dst, \dst, \tmp + REG_L \tmp, 0(\dst) + la \dst, \sym + add \dst, \dst, \tmp +.endm +#else /* CONFIG_SMP */ +.macro asm_per_cpu dst sym tmp + la \dst, \sym +.endm +#endif /* CONFIG_SMP */ + +.macro load_per_cpu dst ptr tmp + asm_per_cpu \dst \ptr \tmp + REG_L \dst, 0(\dst) +.endm + +#ifdef CONFIG_SHADOW_CALL_STACK +/* gp is used as the shadow call stack pointer instead */ +.macro load_global_pointer +.endm +#else +/* load __global_pointer to gp */ +.macro load_global_pointer +.option push +.option norelax + la gp, __global_pointer$ +.option pop +.endm +#endif /* CONFIG_SHADOW_CALL_STACK */ + /* save all GPs except x1 ~ x5 */ .macro save_from_x6_to_x31 REG_S x6, PT_T1(sp) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 65f6eee4ab8d..224b4dc02b50 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,13 +15,261 @@ #include <asm/barrier.h> #include <asm/bitsperlong.h> +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) #include <asm-generic/bitops/__ffs.h> -#include <asm-generic/bitops/ffz.h> -#include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/fls.h> + +#else +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +#if (BITS_PER_LONG == 64) +#define CTZW "ctzw " +#define CLZW "clzw " +#elif (BITS_PER_LONG == 32) +#define CTZW "ctz " +#define CLZW "clz " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned long variable__ffs(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "ctz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return word; + +legacy: + num = 0; +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +/** + * __ffs - find first set bit in a long word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline unsigned long variable__fls(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "clz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return BITS_PER_LONG - 1 - word; + +legacy: + num = BITS_PER_LONG - 1; +#if BITS_PER_LONG == 64 + if (!(word & (~0ul << 32))) { + num -= 32; + word <<= 32; + } +#endif + if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { + num -= 16; + word <<= 16; + } + if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { + num -= 8; + word <<= 8; + } + if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { + num -= 4; + word <<= 4; + } + if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { + num -= 2; + word <<= 2; + } + if (!(word & (~0ul << (BITS_PER_LONG - 1)))) + num -= 1; + return num; +} + +/** + * __fls - find last set bit in a long word + * @word: the word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __fls(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \ + variable__fls(word)) + +static __always_inline int variable_ffs(int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CTZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return r + 1; + +legacy: + r = 1; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +/** + * ffs - find first set bit in a word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs routines. + * + * ffs(value) returns 0 if value is 0 or the position of the first set bit if + * value is nonzero. The first (least significant) bit is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +static __always_inline int variable_fls(unsigned int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CLZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return 32 - r; + +legacy: + r = 32; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** + * fls - find last set bit in a word + * @x: the word to search + * + * This is defined in a similar way as ffs, but returns the position of the most + * significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last set bit if + * value is nonzero. The last (most significant) bit is at position 32. + */ +#define fls(x) \ +({ \ + typeof(x) x_ = (x); \ + __builtin_constant_p(x_) ? \ + (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + : \ + variable_fls(x_); \ +}) + +#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ + +#include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/sched.h> -#include <asm-generic/bitops/ffs.h> #include <asm-generic/bitops/hweight.h> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index d0345bd659c9..a418c3112cd6 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -7,7 +7,10 @@ #define _ASM_CPUFEATURE_H #include <linux/bitmap.h> +#include <linux/jump_label.h> #include <asm/hwcap.h> +#include <asm/alternative-macros.h> +#include <asm/errno.h> /* * These are probed via a device_initcall(), via either the SBI or directly @@ -30,6 +33,104 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void check_unaligned_access(int cpu); +void riscv_user_isa_enable(void); + +#ifdef CONFIG_RISCV_MISALIGNED +bool unaligned_ctl_available(void); +bool check_unaligned_access_emulated(int cpu); +void unaligned_emulation_finish(void); +#else +static inline bool unaligned_ctl_available(void) +{ + return false; +} + +static inline bool check_unaligned_access_emulated(int cpu) +{ + return false; +} + +static inline void unaligned_emulation_finish(void) {} +#endif + +unsigned long riscv_get_elf_hwcap(void); + +struct riscv_isa_ext_data { + const unsigned int id; + const char *name; + const char *property; +}; + +extern const struct riscv_isa_ext_data riscv_isa_ext[]; +extern const size_t riscv_isa_ext_count; +extern bool riscv_isa_fallback; + +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool +riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } + + return true; +l_no: + return false; +} + +static __always_inline bool +riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} + +static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} #endif diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index b3b2dfbdf945..06c236bfab53 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -14,7 +14,7 @@ #include <asm/auxvec.h> #include <asm/byteorder.h> #include <asm/cacheinfo.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> /* * These are used to set parameters in the core dumps. diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 6e4dee49d84b..7ab5e34318c8 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -8,4 +8,18 @@ void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); +#ifdef CONFIG_RISCV_MISALIGNED +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); +#else +static inline int handle_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +static inline int handle_misaligned_store(struct pt_regs *regs) +{ + return -1; +} +#endif + #endif /* _ASM_RISCV_ENTRY_COMMON_H */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index b55b434f0059..83ed25e43553 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -95,31 +95,31 @@ asm volatile(ALTERNATIVE( \ #endif /* - * dcache.ipa rs1 (invalidate, physical address) + * th.dcache.ipa rs1 (invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01010 rs1 000 00000 0001011 - * dache.iva rs1 (invalida, virtual address) + * th.dache.iva rs1 (invalida, virtual address) * 0000001 00110 rs1 000 00000 0001011 * - * dcache.cpa rs1 (clean, physical address) + * th.dcache.cpa rs1 (clean, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01001 rs1 000 00000 0001011 - * dcache.cva rs1 (clean, virtual address) + * th.dcache.cva rs1 (clean, virtual address) * 0000001 00101 rs1 000 00000 0001011 * - * dcache.cipa rs1 (clean then invalidate, physical address) + * th.dcache.cipa rs1 (clean then invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01011 rs1 000 00000 0001011 - * dcache.civa rs1 (... virtual address) + * th.dcache.civa rs1 (... virtual address) * 0000001 00111 rs1 000 00000 0001011 * - * sync.s (make sure all cache operations finished) + * th.sync.s (make sure all cache operations finished) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000000 11001 00000 000 00000 0001011 */ -#define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0255000b" -#define THEAD_flush_A0 ".long 0x0275000b" +#define THEAD_INVAL_A0 ".long 0x0265000b" +#define THEAD_CLEAN_A0 ".long 0x0255000b" +#define THEAD_FLUSH_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 6fc51c1b34cf..06d30526ef3b 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -8,9 +8,6 @@ #ifndef _ASM_RISCV_HWCAP_H #define _ASM_RISCV_HWCAP_H -#include <asm/alternative-macros.h> -#include <asm/errno.h> -#include <linux/bits.h> #include <uapi/asm/hwcap.h> #define RISCV_ISA_EXT_a ('a' - 'a') @@ -69,76 +66,4 @@ #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA #endif -#ifndef __ASSEMBLY__ - -#include <linux/jump_label.h> - -unsigned long riscv_get_elf_hwcap(void); - -struct riscv_isa_ext_data { - const unsigned int id; - const char *name; - const char *property; -}; - -extern const struct riscv_isa_ext_data riscv_isa_ext[]; -extern const size_t riscv_isa_ext_count; -extern bool riscv_isa_fallback; - -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } - - return true; -l_no: - return false; -} - -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } - - return false; -l_yes: - return true; -} - -#endif - #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 78936f4ff513..5c48f48e79a6 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,6 +8,11 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 5 +#define RISCV_HWPROBE_MAX_KEY 6 + +static inline bool riscv_hwprobe_key_is_valid(__s64 key) +{ + return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY; +} #endif diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 6960beb75f32..e27179b26086 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -180,19 +180,19 @@ INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ __RD(0), RS1(gaddr), RS2(vmid)) -#define CBO_inval(base) \ +#define CBO_INVAL(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(0)) -#define CBO_clean(base) \ +#define CBO_CLEAN(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(1)) -#define CBO_flush(base) \ +#define CBO_FLUSH(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) -#define CBO_zero(base) \ +#define CBO_ZERO(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h index e4042d297580..6441ded3b0cf 100644 --- a/arch/riscv/include/asm/irq_stack.h +++ b/arch/riscv/include/asm/irq_stack.h @@ -12,6 +12,9 @@ DECLARE_PER_CPU(ulong *, irq_stack_ptr); +asmlinkage void call_on_irq_stack(struct pt_regs *regs, + void (*func)(struct pt_regs *)); + #ifdef CONFIG_VMAP_STACK /* * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 5488ecc337b6..57e887bfa34c 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -33,8 +33,8 @@ #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so - * define the PAGE_OFFSET value for SV39. + * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so + * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index 59ba1fbaf784..00f3369570a8 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -33,4 +33,7 @@ _PAGE_WRITE | _PAGE_EXEC | \ _PAGE_USER | _PAGE_GLOBAL)) +static const __maybe_unused int pgtable_l4_enabled; +static const __maybe_unused int pgtable_l5_enabled; + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 7a5097202e15..9a2c780a11e9 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -126,14 +126,18 @@ enum napot_cont_order { /* * [63:59] T-Head Memory Type definitions: - * - * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * bit[63] SO - Strong Order + * bit[62] C - Cacheable + * bit[61] B - Bufferable + * bit[60] SH - Shareable + * bit[59] Sec - Trustable + * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable - * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD 0UL -#define _PAGE_IO_THEAD (1UL << 63) +#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60)) +#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) static inline u64 riscv_page_mtmask(void) diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index f896708e8331..179bd4afece4 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -16,9 +16,9 @@ #define _PAGE_GLOBAL (1 << 5) /* Global */ #define _PAGE_ACCESSED (1 << 6) /* Set by hardware on any access */ #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ -#define _PAGE_SOFT (1 << 8) /* Reserved for software */ +#define _PAGE_SOFT (3 << 8) /* Reserved for software */ -#define _PAGE_SPECIAL _PAGE_SOFT +#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index b2ba3f79cfe9..294044429e8e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT +#include <asm/cpufeature.h> static __always_inline bool has_svnapot(void) { @@ -811,7 +812,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, * bit 5: _PAGE_PROT_NONE (zero) * bit 6: exclusive marker * bits 7 to 11: swap type - * bits 11 to XLEN-1: swap offset + * bits 12 to XLEN-1: swap offset */ #define __SWP_TYPE_SHIFT 7 #define __SWP_TYPE_BITS 5 @@ -914,7 +915,6 @@ extern uintptr_t _dtb_early_pa; #define dtb_early_pa _dtb_early_pa #endif /* CONFIG_XIP_KERNEL */ extern u64 satp_mode; -extern bool pgtable_l4_enabled; void paging_init(void); void misc_mem_init(void); diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 441da1839c94..f19f861cda54 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -8,6 +8,7 @@ #include <linux/const.h> #include <linux/cache.h> +#include <linux/prctl.h> #include <vdso/processor.h> @@ -82,6 +83,7 @@ struct thread_struct { unsigned long bad_cause; unsigned long vstate_ctrl; struct __riscv_v_ext_state vstate; + unsigned long align_ctl; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ @@ -94,6 +96,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define INIT_THREAD { \ .sp = sizeof(init_stack) + (long)&init_stack, \ + .align_ctl = PR_UNALIGN_NOPRINT, \ } #define task_pt_regs(tsk) \ @@ -136,6 +139,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg); extern long riscv_v_vstate_ctrl_get_current(void); #endif /* CONFIG_RISCV_ISA_V */ +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + +#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 12dfda6bb924..0892f4421bc4 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -280,9 +280,6 @@ void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_send_ipi(unsigned int cpu); int sbi_remote_fence_i(const struct cpumask *cpu_mask); -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size); int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h new file mode 100644 index 000000000000..0e45db78b24b --- /dev/null +++ b/arch/riscv/include/asm/scs.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifdef __ASSEMBLY__ +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* Load init_shadow_call_stack to gp. */ +.macro scs_load_init_stack + la gp, init_shadow_call_stack + XIP_FIXUP_OFFSET gp +.endm + +/* Load the per-CPU IRQ shadow call stack to gp. */ +.macro scs_load_irq_stack tmp + load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp +.endm + +/* Load task_scs_sp(current) to gp. */ +.macro scs_load_current + REG_L gp, TASK_TI_SCS_SP(tp) +.endm + +/* Load task_scs_sp(current) to gp, but only if tp has changed. */ +.macro scs_load_current_if_task_changed prev + beq \prev, tp, _skip_scs + scs_load_current +_skip_scs: +.endm + +/* Save gp to task_scs_sp(current). */ +.macro scs_save_current + REG_S gp, TASK_TI_SCS_SP(tp) +.endm + +#else /* CONFIG_SHADOW_CALL_STACK */ + +.macro scs_load_init_stack +.endm +.macro scs_load_irq_stack tmp +.endm +.macro scs_load_current +.endm +.macro scs_load_current_if_task_changed prev +.endm +.macro scs_save_current +.endm + +#endif /* CONFIG_SHADOW_CALL_STACK */ +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index a727be723c56..f90d8e42f3c7 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -9,7 +9,7 @@ #include <linux/jump_label.h> #include <linux/sched/task_stack.h> #include <asm/vector.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1833beb00489..574779900bfb 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -34,9 +34,6 @@ #ifndef __ASSEMBLY__ -extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; -extern unsigned long spin_shadow_stack; - #include <asm/processor.h> #include <asm/csr.h> @@ -60,8 +57,20 @@ struct thread_info { long user_sp; /* User stack pointer */ int cpu; unsigned long syscall_work; /* SYSCALL_WORK_ flags */ +#ifdef CONFIG_SHADOW_CALL_STACK + void *scs_base; + void *scs_sp; +#endif }; +#ifdef CONFIG_SHADOW_CALL_STACK +#define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ + .scs_sp = init_shadow_call_stack, +#else +#define INIT_SCS +#endif + /* * macros/functions for gaining access to the thread information structure * @@ -71,6 +80,7 @@ struct thread_info { { \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ + INIT_SCS \ } void arch_release_task_struct(struct task_struct *tsk); diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 120bcf2ed8a8..1eb5682b2af6 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - flush_tlb_mm(tlb->mm); +#ifdef CONFIG_MMU + if (tlb->fullmm || tlb->need_flush_all) + flush_tlb_mm(tlb->mm); + else + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, + tlb_get_unmap_size(tlb)); +#endif } #endif /* _ASM_RISCV_TLB_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index a09196f8de68..8f3418c5f172 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -11,6 +11,9 @@ #include <asm/smp.h> #include <asm/errata_list.h> +#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) +#define FLUSH_TLB_NO_ASID ((unsigned long)-1) + #ifdef CONFIG_MMU extern unsigned long asid_mask; @@ -32,9 +35,12 @@ static inline void local_flush_tlb_page(unsigned long addr) #if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned int page_size); void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -51,14 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, local_flush_tlb_all(); } -#define flush_tlb_mm(mm) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ - /* Flush a range of kernel pages */ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_tlb_all(); + local_flush_tlb_all(); } +#define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#endif /* !CONFIG_SMP || !CONFIG_MMU */ + #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 14f5d27783b8..96b65a5396df 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -14,7 +14,7 @@ static inline void cpu_relax(void) __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); #endif -#ifdef __riscv_zihintpause +#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE /* * Reduce instruction retirement. * This assumes the PC changes. diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c5ee07b3df07..87aaef656257 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -15,7 +15,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/ptrace.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index d696d6610231..11a71b8533d5 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_TLS_DTPREL64 9 #define R_RISCV_TLS_TPREL32 10 #define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_IRELATIVE 58 /* Relocation types not used by the dynamic linker */ #define R_RISCV_BRANCH 16 @@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_ALIGN 43 #define R_RISCV_RVC_BRANCH 44 #define R_RISCV_RVC_JUMP 45 -#define R_RISCV_LUI 46 #define R_RISCV_GPREL_I 47 #define R_RISCV_GPREL_S 48 #define R_RISCV_TPREL_I 49 @@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_SET16 55 #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 +#define R_RISCV_PLT32 59 +#define R_RISCV_SET_ULEB128 60 +#define R_RISCV_SUB_ULEB128 61 #endif /* _UAPI_ASM_RISCV_ELF_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index d43e306ce2f9..b659ffcfcdb4 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -29,6 +29,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZBA (1 << 3) #define RISCV_HWPROBE_EXT_ZBB (1 << 4) #define RISCV_HWPROBE_EXT_ZBS (1 << 5) +#define RISCV_HWPROBE_EXT_ZICBOZ (1 << 6) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -36,6 +37,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0) #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) +#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 95cf25d48405..fee22a3d1b53 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -57,9 +57,10 @@ obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o obj-y += probes/ +obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_SMP) += smpboot.o diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 56cb2c986c48..e619edc8b0cc 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -14,9 +14,10 @@ */ #include <linux/acpi.h> +#include <linux/efi.h> #include <linux/io.h> +#include <linux/memblock.h> #include <linux/pci.h> -#include <linux/efi.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - return (void __iomem *)memremap(phys, size, MEMREMAP_WB); + efi_memory_desc_t *md, *region = NULL; + pgprot_t prot; + + if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP))) + return NULL; + + for_each_efi_memory_desc(md) { + u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (phys < md->phys_addr || phys >= end) + continue; + + if (phys + size > end) { + pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n"); + return NULL; + } + region = md; + break; + } + + /* + * It is fine for AML to remap regions that are not represented in the + * EFI memory map at all, as it only describes normal memory, and MMIO + * regions that require a virtual mapping to make them accessible to + * the EFI runtime services. + */ + prot = PAGE_KERNEL_IO; + if (region) { + switch (region->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: + if (memblock_is_map_memory(phys) || + !memblock_is_region_memory(phys, size)) { + pr_warn(FW_BUG "requested region covers kernel memory\n"); + return NULL; + } + + /* + * Mapping kernel memory is permitted if the region in + * question is covered by a single memblock with the + * NOMAP attribute set: this enables the use of ACPI + * table overrides passed via initramfs. + * This particular use case only requires read access. + */ + fallthrough; + + case EFI_RUNTIME_SERVICES_CODE: + /* + * This would be unusual, but not problematic per se, + * as long as we take care not to create a writable + * mapping for executable code. + */ + prot = PAGE_KERNEL_RO; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + /* + * ACPI reclaim memory is used to pass firmware tables + * and other data that is intended for consumption by + * the OS only, which may decide it wants to reclaim + * that memory and use it for something else. We never + * do that, but we usually add it to the linear map + * anyway, in which case we should use the existing + * mapping. + */ + if (memblock_is_map_memory(phys)) + return (void __iomem *)__va(phys); + fallthrough; + + default: + if (region->attribute & EFI_MEMORY_WB) + prot = PAGE_KERNEL; + else if ((region->attribute & EFI_MEMORY_WC) || + (region->attribute & EFI_MEMORY_WT)) + prot = pgprot_writecombine(PAGE_KERNEL); + } + } + + return ioremap_prot(phys, size, pgprot_val(prot)); } #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index d6a75aac1d27..a03129f40c46 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -14,6 +14,7 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> #include <asm/cpu_ops_sbi.h> +#include <asm/stacktrace.h> #include <asm/suspend.h> void asm_offsets(void); @@ -38,7 +39,11 @@ void asm_offsets(void) OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); +#ifdef CONFIG_SHADOW_CALL_STACK + OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp); +#endif + OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); @@ -479,4 +484,8 @@ void asm_offsets(void) OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); + + DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); + OFFSET(STACKFRAME_FP, stackframe, fp); + OFFSET(STACKFRAME_RA, stackframe, ra); } diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S index cfdecfbaad62..2b3d9398c113 100644 --- a/arch/riscv/kernel/copy-unaligned.S +++ b/arch/riscv/kernel/copy-unaligned.S @@ -9,7 +9,7 @@ /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of 8 * SZREG */ -ENTRY(__riscv_copy_words_unaligned) +SYM_FUNC_START(__riscv_copy_words_unaligned) andi a4, a2, ~((8*SZREG)-1) beqz a4, 2f add a3, a1, a4 @@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned) 2: ret -END(__riscv_copy_words_unaligned) +SYM_FUNC_END(__riscv_copy_words_unaligned) /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -ENTRY(__riscv_copy_bytes_unaligned) +SYM_FUNC_START(__riscv_copy_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 @@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned) 2: ret -END(__riscv_copy_bytes_unaligned) +SYM_FUNC_END(__riscv_copy_bytes_unaligned) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c17dacb1141c..d11d6320fb0d 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -125,13 +125,14 @@ old_interface: */ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) { - int rc; - for (; node; node = node->parent) { if (of_device_is_compatible(node, "riscv")) { - rc = riscv_of_processor_hartid(node, hartid); - if (!rc) - return 0; + *hartid = (unsigned long)of_get_cpu_hwid(node, 0); + if (*hartid == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + return 0; } } @@ -202,9 +203,8 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f) +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) { - seq_puts(f, "isa\t\t: "); if (IS_ENABLED(CONFIG_32BIT)) seq_write(f, "rv32", 4); @@ -212,7 +212,7 @@ static void print_isa(struct seq_file *f) seq_write(f, "rv64", 4); for (int i = 0; i < riscv_isa_ext_count; i++) { - if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id)) + if (!__riscv_isa_extension_available(isa_bitmap, riscv_isa_ext[i].id)) continue; /* Only multi-letter extensions are split by underscores */ @@ -276,7 +276,15 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); - print_isa(m); + + /* + * For historical raisins, the isa: line is limited to the lowest common + * denominator of extensions supported across all harts. A true list of + * extensions supported on this hart is printed later in the hart isa: + * line. + */ + seq_puts(m, "isa\t\t: "); + print_isa(m, NULL); print_mmu(m); if (acpi_disabled) { @@ -292,6 +300,13 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); + + /* + * Print the ISA extensions specific to this hart, which may show + * additional extensions not present across all harts. + */ + seq_puts(m, "hart isa\t: "); + print_isa(m, hart_isa[cpu_id].isa); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index e3803822ab5a..b3785ffc1570 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,6 +8,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/cpuhotplug.h> #include <linux/ctype.h> #include <linux/log2.h> #include <linux/memory.h> @@ -29,6 +30,7 @@ #define MISALIGNED_ACCESS_JIFFIES_LG2 1 #define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) unsigned long elf_hwcap __read_mostly; @@ -93,10 +95,10 @@ static bool riscv_isa_extension_check(int id) return true; case RISCV_ISA_EXT_ZICBOZ: if (!riscv_cboz_block_size) { - pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n"); + pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); return false; } else if (!is_power_of_2(riscv_cboz_block_size)) { - pr_err("cboz-block-size present, but is not a power-of-2\n"); + pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); return false; } return true; @@ -206,10 +208,11 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc switch (*ext) { case 's': /* - * Workaround for invalid single-letter 's' & 'u'(QEMU). + * Workaround for invalid single-letter 's' & 'u' (QEMU). * No need to set the bit in riscv_isa as 's' & 'u' are - * not valid ISA extensions. It works until multi-letter - * extension starting with "Su" appears. + * not valid ISA extensions. It works unless the first + * multi-letter extension in the ISA string begins with + * "Su" and is not prefixed with an underscore. */ if (ext[-1] != '_' && ext[1] == 'u') { ++isa; @@ -558,23 +561,21 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void check_unaligned_access(int cpu) +static int check_unaligned_access(void *param) { + int cpu = smp_processor_id(); u64 start_cycles, end_cycles; u64 word_cycles; u64 byte_cycles; int ratio; unsigned long start_jiffies, now; - struct page *page; + struct page *page = param; void *dst; void *src; long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); - if (!page) { - pr_warn("Can't alloc pages to measure memcpy performance"); - return; - } + if (check_unaligned_access_emulated(cpu)) + return 0; /* Make an unaligned destination buffer. */ dst = (void *)((unsigned long)page_address(page) | 0x1); @@ -628,7 +629,7 @@ void check_unaligned_access(int cpu) pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", cpu); - goto out; + return 0; } if (word_cycles < byte_cycles) @@ -642,18 +643,90 @@ void check_unaligned_access(int cpu) (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; + return 0; +} -out: - __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); } -static int check_unaligned_access_boot_cpu(void) +static int riscv_online_cpu(unsigned int cpu) { - check_unaligned_access(0); + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return 0; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); return 0; } -arch_initcall(check_unaligned_access_boot_cpu); +/* Measure unaligned access on all CPUs present at boot in parallel. */ +static int check_unaligned_access_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), + GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* Setup hotplug callback for any new CPUs that come online. */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, NULL); + +out: + unaligned_emulation_finish(); + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); + +void riscv_user_isa_enable(void) +{ + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) + csr_set(CSR_SENVCFG, ENVCFG_CBZE); +} #ifdef CONFIG_RISCV_ALTERNATIVE /* diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 143a2bb3e697..54ca4564a926 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -9,10 +9,15 @@ #include <asm/asm.h> #include <asm/csr.h> +#include <asm/scs.h> #include <asm/unistd.h> +#include <asm/page.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/errata_list.h> +#include <linux/sizes.h> + + .section .irqentry.text, "ax" SYM_CODE_START(handle_exception) /* @@ -21,9 +26,9 @@ SYM_CODE_START(handle_exception) * register will contain 0, and we should continue on the current TP. */ csrrw tp, CSR_SCRATCH, tp - bnez tp, _save_context + bnez tp, .Lsave_context -_restore_kernel_tpsp: +.Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH REG_S sp, TASK_TI_KERNEL_SP(tp) @@ -35,7 +40,7 @@ _restore_kernel_tpsp: REG_L sp, TASK_TI_KERNEL_SP(tp) #endif -_save_context: +.Lsave_context: REG_S sp, TASK_TI_USER_SP(tp) REG_L sp, TASK_TI_KERNEL_SP(tp) addi sp, sp, -(PT_SIZE_ON_STACK) @@ -73,10 +78,11 @@ _save_context: csrw CSR_SCRATCH, x0 /* Load the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer + + /* Load the kernel shadow call stack pointer if coming from userspace */ + scs_load_current_if_task_changed s5 + move a0, sp /* pt_regs */ la ra, ret_from_exception @@ -123,6 +129,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception) addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) + /* Save the kernel shadow call stack pointer */ + scs_save_current + /* * Save TP into the scratch register , so we can find the kernel data * structures again. @@ -170,67 +179,15 @@ SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) - /* - * Takes the psuedo-spinlock for the shadow stack, in case multiple - * harts are concurrently overflowing their kernel stacks. We could - * store any value here, but since we're overflowing the kernel stack - * already we only have SP to use as a scratch register. So we just - * swap in the address of the spinlock, as that's definately non-zero. - * - * Pairs with a store_release in handle_bad_stack(). - */ -1: la sp, spin_shadow_stack - REG_AMOSWAP_AQ sp, sp, (sp) - bnez sp, 1b - - la sp, shadow_stack - addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE + /* we reach here from kernel context, sscratch must be 0 */ + csrrw x31, CSR_SCRATCH, x31 + asm_per_cpu sp, overflow_stack, x31 + li x31, OVERFLOW_STACK_SIZE + add sp, sp, x31 + /* zero out x31 again and restore x31 */ + xor x31, x31, x31 + csrrw x31, CSR_SCRATCH, x31 - //save caller register to shadow stack - addi sp, sp, -(PT_SIZE_ON_STACK) - REG_S x1, PT_RA(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - la ra, restore_caller_reg - tail get_overflow_stack - -restore_caller_reg: - //save per-cpu overflow stack - REG_S a0, -8(sp) - //restore caller register from shadow_stack - REG_L x1, PT_RA(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - //load per-cpu overflow stack - REG_L sp, -8(sp) addi sp, sp, -(PT_SIZE_ON_STACK) //save context to overflow stack @@ -268,6 +225,43 @@ SYM_CODE_START(ret_from_fork) tail syscall_exit_to_user_mode SYM_CODE_END(ret_from_fork) +#ifdef CONFIG_IRQ_STACKS +/* + * void call_on_irq_stack(struct pt_regs *regs, + * void (*func)(struct pt_regs *)); + * + * Calls func(regs) using the per-CPU IRQ stack. + */ +SYM_FUNC_START(call_on_irq_stack) + /* Create a frame record to save ra and s0 (fp) */ + addi sp, sp, -STACKFRAME_SIZE_ON_STACK + REG_S ra, STACKFRAME_RA(sp) + REG_S s0, STACKFRAME_FP(sp) + addi s0, sp, STACKFRAME_SIZE_ON_STACK + + /* Switch to the per-CPU shadow call stack */ + scs_save_current + scs_load_irq_stack t0 + + /* Switch to the per-CPU IRQ stack and call the handler */ + load_per_cpu t0, irq_stack_ptr, t1 + li t1, IRQ_STACK_SIZE + add sp, t0, t1 + jalr a1 + + /* Switch back to the thread shadow call stack */ + scs_load_current + + /* Switch back to the thread stack and restore ra and s0 */ + addi sp, s0, -STACKFRAME_SIZE_ON_STACK + REG_L ra, STACKFRAME_RA(sp) + REG_L s0, STACKFRAME_FP(sp) + addi sp, sp, STACKFRAME_SIZE_ON_STACK + + ret +SYM_FUNC_END(call_on_irq_stack) +#endif /* CONFIG_IRQ_STACKS */ + /* * Integer register context switch * The callee-saved registers must be saved and restored. @@ -297,6 +291,8 @@ SYM_FUNC_START(__switch_to) REG_S s9, TASK_THREAD_S9_RA(a3) REG_S s10, TASK_THREAD_S10_RA(a3) REG_S s11, TASK_THREAD_S11_RA(a3) + /* Save the kernel shadow call stack pointer */ + scs_save_current /* Restore context from next->thread */ REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4) @@ -314,6 +310,8 @@ SYM_FUNC_START(__switch_to) REG_L s11, TASK_THREAD_S11_RA(a4) /* The offset of thread_info in task_struct is zero. */ move tp, a1 + /* Switch to the next shadow call stack */ + scs_load_current ret SYM_FUNC_END(__switch_to) @@ -324,7 +322,7 @@ SYM_FUNC_END(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -SYM_CODE_START(excp_vect_table) +SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -342,12 +340,11 @@ SYM_CODE_START(excp_vect_table) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ -excp_vect_table_end: -SYM_CODE_END(excp_vect_table) +SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) #ifndef CONFIG_MMU -SYM_CODE_START(__user_rt_sigreturn) +SYM_DATA_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn ecall -SYM_CODE_END(__user_rt_sigreturn) +SYM_DATA_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index dd2205473de7..2c543f130f93 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -19,7 +19,7 @@ #include <asm/csr.h> #include <asm/asm-offsets.h> -ENTRY(__fstate_save) +SYM_FUNC_START(__fstate_save) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -60,9 +60,9 @@ ENTRY(__fstate_save) sw t0, TASK_THREAD_FCSR_F0(a0) csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_save) +SYM_FUNC_END(__fstate_save) -ENTRY(__fstate_restore) +SYM_FUNC_START(__fstate_restore) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -103,4 +103,125 @@ ENTRY(__fstate_restore) fscsr t0 csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_restore) +SYM_FUNC_END(__fstate_restore) + +#define get_f32(which) fmv.x.s a0, which; j 2f +#define put_f32(which) fmv.s.x which, a1; j 2f +#if __riscv_xlen == 64 +# define get_f64(which) fmv.x.d a0, which; j 2f +# define put_f64(which) fmv.d.x which, a1; j 2f +#else +# define get_f64(which) fsd which, 0(a1); j 2f +# define put_f64(which) fld which, 0(a1); j 2f +#endif + +.macro fp_access_prologue + /* + * Compute jump offset to store the correct FP register since we don't + * have indirect FP register access + */ + sll t0, a0, 3 + la t2, 1f + add t0, t0, t2 + li t1, SR_FS + csrs CSR_STATUS, t1 + jr t0 +1: +.endm + +.macro fp_access_epilogue +2: + csrc CSR_STATUS, t1 + ret +.endm + +#define fp_access_body(__access_func) \ + __access_func(f0); \ + __access_func(f1); \ + __access_func(f2); \ + __access_func(f3); \ + __access_func(f4); \ + __access_func(f5); \ + __access_func(f6); \ + __access_func(f7); \ + __access_func(f8); \ + __access_func(f9); \ + __access_func(f10); \ + __access_func(f11); \ + __access_func(f12); \ + __access_func(f13); \ + __access_func(f14); \ + __access_func(f15); \ + __access_func(f16); \ + __access_func(f17); \ + __access_func(f18); \ + __access_func(f19); \ + __access_func(f20); \ + __access_func(f21); \ + __access_func(f22); \ + __access_func(f23); \ + __access_func(f24); \ + __access_func(f25); \ + __access_func(f26); \ + __access_func(f27); \ + __access_func(f28); \ + __access_func(f29); \ + __access_func(f30); \ + __access_func(f31) + + +#ifdef CONFIG_RISCV_MISALIGNED + +/* + * Disable compressed instructions set to keep a constant offset between FP + * load/store/move instructions + */ +.option norvc +/* + * put_f32_reg - Set a FP register from a register containing the value + * a0 = FP register index to be set + * a1 = value to be loaded in the FP register + */ +SYM_FUNC_START(put_f32_reg) + fp_access_prologue + fp_access_body(put_f32) + fp_access_epilogue +SYM_FUNC_END(put_f32_reg) + +/* + * get_f32_reg - Get a FP register value and return it + * a0 = FP register index to be retrieved + */ +SYM_FUNC_START(get_f32_reg) + fp_access_prologue + fp_access_body(get_f32) + fp_access_epilogue +SYM_FUNC_END(get_f32_reg) + +/* + * put_f64_reg - Set a 64 bits FP register from a value or a pointer. + * a0 = FP register index to be set + * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we + * load the value to a pointer). + */ +SYM_FUNC_START(put_f64_reg) + fp_access_prologue + fp_access_body(put_f64) + fp_access_epilogue +SYM_FUNC_END(put_f64_reg) + +/* + * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * a pointer. + * a0 = FP register index to be retrieved + * a1 = If xlen == 32, pointer which should be loaded with the FP register value + * or unused if xlen == 64. In which case the FP register value is returned + * through a0 + */ +SYM_FUNC_START(get_f64_reg) + fp_access_prologue + fp_access_body(get_f64) + fp_access_epilogue +SYM_FUNC_END(get_f64_reg) + +#endif /* CONFIG_RISCV_MISALIGNED */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 3710ea5d160f..b77397432403 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,11 +14,12 @@ #include <asm/cpu_ops_sbi.h> #include <asm/hwcap.h> #include <asm/image.h> +#include <asm/scs.h> #include <asm/xip_fixup.h> #include "efi-header.S" __HEAD -ENTRY(_start) +SYM_CODE_START(_start) /* * Image header expected by Linux boot-loaders. The image header data * structure is described in asm/image.h. @@ -110,10 +111,7 @@ relocate_enable_mmu: csrw CSR_TVEC, a0 /* Reload the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer /* * Switch to kernel page tables. A full fence is necessary in order to @@ -134,10 +132,7 @@ secondary_start_sbi: csrw CSR_IP, zero /* Load the global pointer */ - .option push - .option norelax - la gp, __global_pointer$ - .option pop + load_global_pointer /* * Disable FPU & VECTOR to detect illegal usage of @@ -159,6 +154,7 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) + scs_load_current .Lsecondary_start_common: @@ -168,12 +164,12 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a0 call relocate_enable_mmu #endif - call setup_trap_vector + call .Lsetup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ .align 2 -setup_trap_vector: +.Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception csrw CSR_TVEC, a0 @@ -191,9 +187,9 @@ setup_trap_vector: wfi j .Lsecondary_park -END(_start) +SYM_CODE_END(_start) -ENTRY(_start_kernel) +SYM_CODE_START(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -210,7 +206,7 @@ ENTRY(_start_kernel) * not implement PMPs, so we set up a quick trap handler to just skip * touching the PMPs on any trap. */ - la a0, pmp_done + la a0, .Lpmp_done csrw CSR_TVEC, a0 li a0, -1 @@ -218,7 +214,7 @@ ENTRY(_start_kernel) li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) csrw CSR_PMPCFG0, a0 .align 2 -pmp_done: +.Lpmp_done: /* * The hartid in a0 is expected later on, and we have no firmware @@ -228,10 +224,7 @@ pmp_done: #endif /* CONFIG_RISCV_M_MODE */ /* Load the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop + load_global_pointer /* * Disable FPU & VECTOR to detect illegal usage of @@ -282,12 +275,12 @@ pmp_done: /* Clear BSS for flat non-ELF images */ la a3, __bss_start la a4, __bss_stop - ble a4, a3, clear_bss_done -clear_bss: + ble a4, a3, .Lclear_bss_done +.Lclear_bss: REG_S zero, (a3) add a3, a3, RISCV_SZPTR - blt a3, a4, clear_bss -clear_bss_done: + blt a3, a4, .Lclear_bss +.Lclear_bss_done: #endif la a2, boot_cpu_hartid XIP_FIXUP_OFFSET a2 @@ -298,6 +291,7 @@ clear_bss_done: la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp addi sp, sp, -PT_SIZE_ON_STACK + scs_load_init_stack #ifdef CONFIG_BUILTIN_DTB la a0, __dtb_start XIP_FIXUP_OFFSET a0 @@ -311,11 +305,12 @@ clear_bss_done: call relocate_enable_mmu #endif /* CONFIG_MMU */ - call setup_trap_vector + call .Lsetup_trap_vector /* Restore C environment */ la tp, init_task la sp, init_thread_union + THREAD_SIZE addi sp, sp, -PT_SIZE_ON_STACK + scs_load_current #ifdef CONFIG_KASAN call kasan_early_init @@ -353,10 +348,10 @@ clear_bss_done: tail .Lsecondary_start_common #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ -END(_start_kernel) +SYM_CODE_END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE -ENTRY(reset_regs) +SYM_CODE_START_LOCAL(reset_regs) li sp, 0 li gp, 0 li tp, 0 @@ -454,5 +449,5 @@ ENTRY(reset_regs) .Lreset_regs_done_vector: #endif /* CONFIG_RISCV_ISA_V */ ret -END(reset_regs) +SYM_CODE_END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index d698dd7df637..d040dcf4add4 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -21,7 +21,7 @@ * * Always returns 0 */ -ENTRY(__hibernate_cpu_resume) +SYM_FUNC_START(__hibernate_cpu_resume) /* switch to hibernated image's page table. */ csrw CSR_SATP, s0 sfence.vma @@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume) mv a0, zero ret -END(__hibernate_cpu_resume) +SYM_FUNC_END(__hibernate_cpu_resume) /* * Prepare to restore the image. @@ -42,7 +42,7 @@ END(__hibernate_cpu_resume) * a1: satp of temporary page tables. * a2: cpu_resume. */ -ENTRY(hibernate_restore_image) +SYM_FUNC_START(hibernate_restore_image) mv s0, a0 mv s1, a1 mv s2, a2 @@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image) REG_L a1, relocated_restore_code jr a1 -END(hibernate_restore_image) +SYM_FUNC_END(hibernate_restore_image) /* * The below code will be executed from a 'safe' page. @@ -58,7 +58,7 @@ END(hibernate_restore_image) * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume() * to restore the CPU context. */ -ENTRY(hibernate_core_restore_code) +SYM_FUNC_START(hibernate_core_restore_code) /* switch to temp page table. */ csrw satp, s1 sfence.vma @@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code) bnez s4, .Lcopy jr s2 -END(hibernate_core_restore_code) +SYM_FUNC_END(hibernate_core_restore_code) diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 9cc0a7669271..9ceda02507ca 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -9,6 +9,7 @@ #include <linux/irqchip.h> #include <linux/irqdomain.h> #include <linux/module.h> +#include <linux/scs.h> #include <linux/seq_file.h> #include <asm/sbi.h> #include <asm/smp.h> @@ -34,6 +35,24 @@ EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode); #ifdef CONFIG_IRQ_STACKS #include <asm/irq_stack.h> +DECLARE_PER_CPU(ulong *, irq_shadow_call_stack_ptr); + +#ifdef CONFIG_SHADOW_CALL_STACK +DEFINE_PER_CPU(ulong *, irq_shadow_call_stack_ptr); +#endif + +static void init_irq_scs(void) +{ + int cpu; + + if (!scs_is_enabled()) + return; + + for_each_possible_cpu(cpu) + per_cpu(irq_shadow_call_stack_ptr, cpu) = + scs_alloc(cpu_to_node(cpu)); +} + DEFINE_PER_CPU(ulong *, irq_stack_ptr); #ifdef CONFIG_VMAP_STACK @@ -61,40 +80,22 @@ static void init_irq_stacks(void) #endif /* CONFIG_VMAP_STACK */ #ifdef CONFIG_SOFTIRQ_ON_OWN_STACK +static void ___do_softirq(struct pt_regs *regs) +{ + __do_softirq(); +} + void do_softirq_own_stack(void) { -#ifdef CONFIG_IRQ_STACKS - if (on_thread_stack()) { - ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) - + IRQ_STACK_SIZE/sizeof(ulong); - __asm__ __volatile( - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" ra, (sp) \n" - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" s0, (sp) \n" - "addi s0, sp, 2*"RISCV_SZPTR "\n" - "move sp, %[sp] \n" - "call __do_softirq \n" - "addi sp, s0, -2*"RISCV_SZPTR"\n" - REG_L" s0, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - REG_L" ra, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - : - : [sp] "r" (sp) - : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "t0", "t1", "t2", "t3", "t4", "t5", "t6", -#ifndef CONFIG_FRAME_POINTER - "s0", -#endif - "memory"); - } else -#endif + if (on_thread_stack()) + call_on_irq_stack(NULL, ___do_softirq); + else __do_softirq(); } #endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */ #else +static void init_irq_scs(void) {} static void init_irq_stacks(void) {} #endif /* CONFIG_IRQ_STACKS */ @@ -106,6 +107,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) void __init init_IRQ(void) { + init_irq_scs(); init_irq_stacks(); irqchip_init(); if (!handle_arch_irq) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index 059c5e216ae7..de0a4b35d01e 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate) * s1: (const) Phys address to jump to after relocation * s2: (const) Phys address of the FDT image * s3: (const) The hartid of the current hart - * s4: Pointer to the destination address for the relocation - * s5: (const) Number of words per page - * s6: (const) 1, used for subtraction - * s7: (const) kernel_map.va_pa_offset, used when switching MMU off - * s8: (const) Physical address of the main loop - * s9: (debug) indirection page counter - * s10: (debug) entry counter - * s11: (debug) copied words counter + * s4: (const) kernel_map.va_pa_offset, used when switching MMU off + * s5: Pointer to the destination address for the relocation + * s6: (const) Physical address of the main loop */ mv s0, a0 mv s1, a1 mv s2, a2 mv s3, a3 - mv s4, zero - li s5, (PAGE_SIZE / RISCV_SZPTR) - li s6, 1 - mv s7, a4 - mv s8, zero - mv s9, zero - mv s10, zero - mv s11, zero + mv s4, a4 + mv s5, zero + mv s6, zero /* Disable / cleanup interrupts */ csrw CSR_SIE, zero @@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate) * the start of the loop below so that we jump there in * any case. */ - la s8, 1f - sub s8, s8, s7 - csrw CSR_STVEC, s8 + la s6, 1f + sub s6, s6, s4 + csrw CSR_STVEC, s6 + + /* + * With C-extension, here we get 42 Bytes and the next + * .align directive would pad zeros here up to 44 Bytes. + * So manually put a nop here to avoid zeros padding. + */ + nop /* Process entries in a loop */ .align 2 1: - addi s10, s10, 1 REG_L t0, 0(s0) /* t0 = *image->entry */ addi s0, s0, RISCV_SZPTR /* image->entry++ */ /* IND_DESTINATION entry ? -> save destination address */ andi t1, t0, 0x1 beqz t1, 2f - andi s4, t0, ~0x1 + andi s5, t0, ~0x1 j 1b 2: @@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate) andi t1, t0, 0x2 beqz t1, 2f andi s0, t0, ~0x2 - addi s9, s9, 1 csrw CSR_SATP, zero - jalr zero, s8, 0 + jr s6 2: /* IND_DONE entry ? -> jump to done label */ @@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate) andi t1, t0, 0x8 beqz t1, 1b /* Unknown entry type, ignore it */ andi t0, t0, ~0x8 - mv t3, s5 /* i = num words per page */ + li t3, (PAGE_SIZE / RISCV_SZPTR) /* i = num words per page */ 3: /* copy loop */ REG_L t1, (t0) /* t1 = *src_ptr */ - REG_S t1, (s4) /* *dst_ptr = *src_ptr */ + REG_S t1, (s5) /* *dst_ptr = *src_ptr */ addi t0, t0, RISCV_SZPTR /* stc_ptr++ */ - addi s4, s4, RISCV_SZPTR /* dst_ptr++ */ - sub t3, t3, s6 /* i-- */ - addi s11, s11, 1 /* c++ */ + addi s5, s5, RISCV_SZPTR /* dst_ptr++ */ + addi t3, t3, -0x1 /* i-- */ beqz t3, 1b /* copy done ? */ j 3b @@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate) */ fence.i - jalr zero, a2, 0 + jr a2 SYM_CODE_END(riscv_kexec_relocate) riscv_kexec_relocate_end: diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 669b8697aa38..58dd96a2a153 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -82,7 +82,7 @@ .endm #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -ENTRY(ftrace_caller) +SYM_FUNC_START(ftrace_caller) SAVE_ABI addi a0, t0, -FENTRY_RA_OFFSET @@ -91,8 +91,7 @@ ENTRY(ftrace_caller) mv a1, ra mv a3, sp -ftrace_call: - .global ftrace_call +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,16 +101,15 @@ ftrace_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_call: - .global ftrace_graph_call +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI jr t0 -ENDPROC(ftrace_caller) +SYM_FUNC_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_regs_caller) +SYM_FUNC_START(ftrace_regs_caller) SAVE_ALL addi a0, t0, -FENTRY_RA_OFFSET @@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller) mv a1, ra mv a3, sp -ftrace_regs_call: - .global ftrace_regs_call +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -131,12 +128,11 @@ ftrace_regs_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_regs_call: - .global ftrace_graph_regs_call +SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ALL jr t0 -ENDPROC(ftrace_regs_caller) +SYM_FUNC_END(ftrace_regs_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8818a8fa9ff3..b4dd9ed6849e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -ENTRY(return_to_handler) +SYM_FUNC_START(return_to_handler) /* * On implementing the frame point test, the ideal way is to compare the * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. @@ -76,25 +76,25 @@ ENTRY(return_to_handler) mv a2, a0 RESTORE_RET_ABI_STATE jalr a2 -ENDPROC(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(MCOUNT_NAME) +SYM_FUNC_START(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return REG_L t1, 0(t0) - bne t1, t4, do_ftrace_graph_caller + bne t1, t4, .Ldo_ftrace_graph_caller la t3, ftrace_graph_entry REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub - bne t2, t6, do_ftrace_graph_caller + bne t2, t6, .Ldo_ftrace_graph_caller #endif la t3, ftrace_trace_function REG_L t5, 0(t3) - bne t5, t4, do_trace + bne t5, t4, .Ldo_trace ret #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME) * A pseudo representation for the function graph tracer: * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ -do_ftrace_graph_caller: +.Ldo_ftrace_graph_caller: addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -118,7 +118,7 @@ do_ftrace_graph_caller: * A pseudo representation for the function tracer: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ -do_trace: +.Ldo_trace: REG_L a1, -SZREG(s0) mv a0, ra @@ -126,6 +126,6 @@ do_trace: jalr t5 RESTORE_ABI_STATE ret -ENDPROC(MCOUNT_NAME) +SYM_FUNC_END(MCOUNT_NAME) #endif EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7c651d55fcbd..56a8c78e9e21 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -7,6 +7,9 @@ #include <linux/elf.h> #include <linux/err.h> #include <linux/errno.h> +#include <linux/hashtable.h> +#include <linux/kernel.h> +#include <linux/log2.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> #include <linux/sizes.h> @@ -14,6 +17,38 @@ #include <asm/alternative.h> #include <asm/sections.h> +struct used_bucket { + struct list_head head; + struct hlist_head *bucket; +}; + +struct relocation_head { + struct hlist_node node; + struct list_head *rel_entry; + void *location; +}; + +struct relocation_entry { + struct list_head head; + Elf_Addr value; + unsigned int type; +}; + +struct relocation_handlers { + int (*reloc_handler)(struct module *me, void *location, Elf_Addr v); + int (*accumulate_handler)(struct module *me, void *location, + long buffer); +}; + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations); +void process_accumulated_relocations(struct module *me); +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v); + +struct hlist_head *relocation_hashtable; + +struct list_head used_buckets_list; + /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -27,68 +62,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) #endif } -static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) +static int riscv_insn_rmw(void *location, u32 keep, u32 set) +{ + u16 *parcel = location; + u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= keep; + insn |= set; + + parcel[0] = cpu_to_le16(insn); + parcel[1] = cpu_to_le16(insn >> 16); + return 0; +} + +static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) +{ + u16 *parcel = location; + u16 insn = le16_to_cpu(*parcel); + + insn &= keep; + insn |= set; + + *parcel = cpu_to_le16(insn); + return 0; +} + +static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", me->name, (long long)v); return -EINVAL; } - *location = v; + *(u32 *)location = v; return 0; } -static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) +static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location = v; return 0; } -static int apply_r_riscv_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm12 = (offset & 0x1000) << (31 - 12); u32 imm11 = (offset & 0x800) >> (11 - 7); u32 imm10_5 = (offset & 0x7e0) << (30 - 10); u32 imm4_1 = (offset & 0x1e) << (11 - 4); - *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1); } -static int apply_r_riscv_jal_rela(struct module *me, u32 *location, +static int apply_r_riscv_jal_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm20 = (offset & 0x100000) << (31 - 20); u32 imm19_12 = (offset & 0xff000); u32 imm11 = (offset & 0x800) << (20 - 11); u32 imm10_1 = (offset & 0x7fe) << (30 - 10); - *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1; - return 0; + return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1); } -static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm8 = (offset & 0x100) << (12 - 8); u16 imm7_6 = (offset & 0xc0) >> (6 - 5); u16 imm5 = (offset & 0x20) >> (5 - 2); u16 imm4_3 = (offset & 0x18) << (12 - 5); u16 imm2_1 = (offset & 0x6) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe383) | - imm8 | imm7_6 | imm5 | imm4_3 | imm2_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe383, + imm8 | imm7_6 | imm5 | imm4_3 | imm2_1); } -static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm11 = (offset & 0x800) << (12 - 11); u16 imm10 = (offset & 0x400) >> (10 - 8); u16 imm9_8 = (offset & 0x300) << (12 - 11); @@ -98,16 +155,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, u16 imm4 = (offset & 0x10) << (12 - 5); u16 imm3_1 = (offset & 0xe) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe003) | - imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe003, + imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1); } -static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( @@ -116,23 +171,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* * v is the lo12 value to fill. It is calculated before calling this * handler. */ - *location = (*location & 0xfffff) | ((v & 0xfff) << 20); - return 0; + return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20); } -static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* @@ -142,15 +194,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, u32 imm11_5 = (v & 0xfe0) << (31 - 11); u32 imm4_0 = (v & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_hi20_rela(struct module *me, void *location, Elf_Addr v) { - s32 hi20; - if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -158,22 +207,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = ((s32)v + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000); } -static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ s32 hi20 = ((s32)v + 0x800) & 0xfffff000; s32 lo12 = ((s32)v - hi20); - *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20); - return 0; + + return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20); } -static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ @@ -181,20 +228,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, s32 lo12 = ((s32)v - hi20); u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11); u32 imm4_0 = (lo12 & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_got_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; /* Always emit the got entry */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_got_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_got_entry(me, v) - location; } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", @@ -202,22 +247,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_plt_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_plt_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_plt_entry(me, v) - location; } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -228,15 +270,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_call_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { @@ -248,18 +289,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_relax_rela(struct module *me, u32 *location, +static int apply_r_riscv_relax_rela(struct module *me, void *location, Elf_Addr v) { return 0; } -static int apply_r_riscv_align_rela(struct module *me, u32 *location, +static int apply_r_riscv_align_rela(struct module *me, void *location, Elf_Addr v) { pr_err( @@ -268,91 +308,446 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, return -EINVAL; } -static int apply_r_riscv_add16_rela(struct module *me, u32 *location, +static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location += (u8)v; + return 0; +} + +static int apply_r_riscv_add16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location += (u16)v; return 0; } -static int apply_r_riscv_add32_rela(struct module *me, u32 *location, +static int apply_r_riscv_add32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location += (u32)v; return 0; } -static int apply_r_riscv_add64_rela(struct module *me, u32 *location, +static int apply_r_riscv_add64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location += (u64)v; return 0; } -static int apply_r_riscv_sub16_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location -= (u8)v; + return 0; +} + +static int apply_r_riscv_sub16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location -= (u16)v; return 0; } -static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location -= (u32)v; return 0; } -static int apply_r_riscv_sub64_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location -= (u64)v; return 0; } -static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, - Elf_Addr v) = { - [R_RISCV_32] = apply_r_riscv_32_rela, - [R_RISCV_64] = apply_r_riscv_64_rela, - [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, - [R_RISCV_JAL] = apply_r_riscv_jal_rela, - [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, - [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, - [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, - [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, - [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, - [R_RISCV_HI20] = apply_r_riscv_hi20_rela, - [R_RISCV_LO12_I] = apply_r_riscv_lo12_i_rela, - [R_RISCV_LO12_S] = apply_r_riscv_lo12_s_rela, - [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, - [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, - [R_RISCV_CALL] = apply_r_riscv_call_rela, - [R_RISCV_RELAX] = apply_r_riscv_relax_rela, - [R_RISCV_ALIGN] = apply_r_riscv_align_rela, - [R_RISCV_ADD16] = apply_r_riscv_add16_rela, - [R_RISCV_ADD32] = apply_r_riscv_add32_rela, - [R_RISCV_ADD64] = apply_r_riscv_add64_rela, - [R_RISCV_SUB16] = apply_r_riscv_sub16_rela, - [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, - [R_RISCV_SUB64] = apply_r_riscv_sub64_rela, +static int dynamic_linking_not_supported(struct module *me, void *location, + Elf_Addr v) +{ + pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int tls_not_supported(struct module *me, void *location, Elf_Addr v) +{ + pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte - (value & 0x3f)) & 0x3f; + return 0; +} + +static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location = (u8)v; + return 0; +} + +static int apply_r_riscv_set16_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u16 *)location = (u16)v; + return 0; +} + +static int apply_r_riscv_set32_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = (u32)v; + return 0; +} + +static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = v - (uintptr_t)location; + return 0; +} + +static int apply_r_riscv_plt32_rela(struct module *me, void *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - location; + + if (!riscv_insn_valid_32bit_offset(offset)) { + /* Only emit the plt entry if offset over 32-bit range */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = (void *)module_emit_plt_entry(me, v) - location; + } else { + pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + } + + *(u32 *)location = (u32)offset; + return 0; +} + +static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location = v; + return 0; +} + +static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location -= v; + return 0; +} + +static int apply_6_bit_accumulation(struct module *me, void *location, long buffer) +{ + u8 *byte = location; + u8 value = buffer; + + if (buffer > 0x3f) { + pr_err("%s: value %ld out of range for 6-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_8_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U8_MAX) { + pr_err("%s: value %ld out of range for 8-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u8 *)location = (u8)buffer; + return 0; +} + +static int apply_16_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U16_MAX) { + pr_err("%s: value %ld out of range for 16-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u16 *)location = (u16)buffer; + return 0; +} + +static int apply_32_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U32_MAX) { + pr_err("%s: value %ld out of range for 32-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u32 *)location = (u32)buffer; + return 0; +} + +static int apply_64_bit_accumulation(struct module *me, void *location, long buffer) +{ + *(u64 *)location = (u64)buffer; + return 0; +} + +static int apply_uleb128_accumulation(struct module *me, void *location, long buffer) +{ + /* + * ULEB128 is a variable length encoding. Encode the buffer into + * the ULEB128 data format. + */ + u8 *p = location; + + while (buffer != 0) { + u8 value = buffer & 0x7f; + + buffer >>= 7; + value |= (!!buffer) << 7; + + *p++ = value; + } + return 0; +} + +/* + * Relocations defined in the riscv-elf-psabi-doc. + * This handles static linking only. + */ +static const struct relocation_handlers reloc_handlers[] = { + [R_RISCV_32] = { .reloc_handler = apply_r_riscv_32_rela }, + [R_RISCV_64] = { .reloc_handler = apply_r_riscv_64_rela }, + [R_RISCV_RELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_COPY] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_JUMP_SLOT] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL64] = { .reloc_handler = dynamic_linking_not_supported }, + /* 12-15 undefined */ + [R_RISCV_BRANCH] = { .reloc_handler = apply_r_riscv_branch_rela }, + [R_RISCV_JAL] = { .reloc_handler = apply_r_riscv_jal_rela }, + [R_RISCV_CALL] = { .reloc_handler = apply_r_riscv_call_rela }, + [R_RISCV_CALL_PLT] = { .reloc_handler = apply_r_riscv_call_plt_rela }, + [R_RISCV_GOT_HI20] = { .reloc_handler = apply_r_riscv_got_hi20_rela }, + [R_RISCV_TLS_GOT_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TLS_GD_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_PCREL_HI20] = { .reloc_handler = apply_r_riscv_pcrel_hi20_rela }, + [R_RISCV_PCREL_LO12_I] = { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela }, + [R_RISCV_PCREL_LO12_S] = { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela }, + [R_RISCV_HI20] = { .reloc_handler = apply_r_riscv_hi20_rela }, + [R_RISCV_LO12_I] = { .reloc_handler = apply_r_riscv_lo12_i_rela }, + [R_RISCV_LO12_S] = { .reloc_handler = apply_r_riscv_lo12_s_rela }, + [R_RISCV_TPREL_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_I] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_S] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_ADD] = { .reloc_handler = tls_not_supported }, + [R_RISCV_ADD8] = { .reloc_handler = apply_r_riscv_add8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_ADD16] = { .reloc_handler = apply_r_riscv_add16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_ADD32] = { .reloc_handler = apply_r_riscv_add32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_ADD64] = { .reloc_handler = apply_r_riscv_add64_rela, + .accumulate_handler = apply_64_bit_accumulation }, + [R_RISCV_SUB8] = { .reloc_handler = apply_r_riscv_sub8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SUB16] = { .reloc_handler = apply_r_riscv_sub16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SUB32] = { .reloc_handler = apply_r_riscv_sub32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_SUB64] = { .reloc_handler = apply_r_riscv_sub64_rela, + .accumulate_handler = apply_64_bit_accumulation }, + /* 41-42 reserved for future standard use */ + [R_RISCV_ALIGN] = { .reloc_handler = apply_r_riscv_align_rela }, + [R_RISCV_RVC_BRANCH] = { .reloc_handler = apply_r_riscv_rvc_branch_rela }, + [R_RISCV_RVC_JUMP] = { .reloc_handler = apply_r_riscv_rvc_jump_rela }, + /* 46-50 reserved for future standard use */ + [R_RISCV_RELAX] = { .reloc_handler = apply_r_riscv_relax_rela }, + [R_RISCV_SUB6] = { .reloc_handler = apply_r_riscv_sub6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET6] = { .reloc_handler = apply_r_riscv_set6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET8] = { .reloc_handler = apply_r_riscv_set8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SET16] = { .reloc_handler = apply_r_riscv_set16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SET32] = { .reloc_handler = apply_r_riscv_set32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_32_PCREL] = { .reloc_handler = apply_r_riscv_32_pcrel_rela }, + [R_RISCV_IRELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_PLT32] = { .reloc_handler = apply_r_riscv_plt32_rela }, + [R_RISCV_SET_ULEB128] = { .reloc_handler = apply_r_riscv_set_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, + [R_RISCV_SUB_ULEB128] = { .reloc_handler = apply_r_riscv_sub_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, + /* 62-191 reserved for future standard use */ + /* 192-255 nonstandard ABI extensions */ }; +void process_accumulated_relocations(struct module *me) +{ + /* + * Only ADD/SUB/SET/ULEB128 should end up here. + * + * Each bucket may have more than one relocation location. All + * relocations for a location are stored in a list in a bucket. + * + * Relocations are applied to a temp variable before being stored to the + * provided location to check for overflow. This also allows ULEB128 to + * properly decide how many entries are needed before storing to + * location. The final value is stored into location using the handler + * for the last relocation to an address. + * + * Three layers of indexing: + * - Each of the buckets in use + * - Groups of relocations in each bucket by location address + * - Each relocation entry for a location address + */ + struct used_bucket *bucket_iter; + struct relocation_head *rel_head_iter; + struct relocation_entry *rel_entry_iter; + int curr_type; + void *location; + long buffer; + + list_for_each_entry(bucket_iter, &used_buckets_list, head) { + hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + buffer = 0; + location = rel_head_iter->location; + list_for_each_entry(rel_entry_iter, + rel_head_iter->rel_entry, head) { + curr_type = rel_entry_iter->type; + reloc_handlers[curr_type].reloc_handler( + me, &buffer, rel_entry_iter->value); + kfree(rel_entry_iter); + } + reloc_handlers[curr_type].accumulate_handler( + me, location, buffer); + kfree(rel_head_iter); + } + kfree(bucket_iter); + } + + kfree(relocation_hashtable); +} + +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v) +{ + struct relocation_entry *entry; + struct relocation_head *rel_head; + struct hlist_head *current_head; + struct used_bucket *bucket; + unsigned long hash; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + INIT_LIST_HEAD(&entry->head); + entry->type = type; + entry->value = v; + + hash = hash_min((uintptr_t)location, hashtable_bits); + + current_head = &relocation_hashtable[hash]; + + /* Find matching location (if any) */ + bool found = false; + struct relocation_head *rel_head_iter; + + hlist_for_each_entry(rel_head_iter, current_head, node) { + if (rel_head_iter->location == location) { + found = true; + rel_head = rel_head_iter; + break; + } + } + + if (!found) { + rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + rel_head->rel_entry = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + INIT_LIST_HEAD(rel_head->rel_entry); + rel_head->location = location; + INIT_HLIST_NODE(&rel_head->node); + if (!current_head->first) { + bucket = + kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + INIT_LIST_HEAD(&bucket->head); + bucket->bucket = current_head; + list_add(&bucket->head, &used_buckets_list); + } + hlist_add_head(&rel_head->node, current_head); + } + + /* Add relocation to head of discovered rel_head */ + list_add_tail(&entry->head, rel_head->rel_entry); + + return 0; +} + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +{ + /* Can safely assume that bits is not greater than sizeof(long) */ + unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + unsigned int hashtable_bits = ilog2(hashtable_size); + + /* + * Double size of hashtable if num_relocations * 1.25 is greater than + * hashtable_size. + */ + int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size)); + + hashtable_bits += should_double_size; + + hashtable_size <<= should_double_size; + + relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + __hash_init(relocation_hashtable, hashtable_size); + + INIT_LIST_HEAD(&used_buckets_list); + + return hashtable_bits; +} + int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *me) { Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; - int (*handler)(struct module *me, u32 *location, Elf_Addr v); + int (*handler)(struct module *me, void *location, Elf_Addr v); Elf_Sym *sym; - u32 *location; + void *location; unsigned int i, type; Elf_Addr v; int res; + unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); + unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + for (i = 0; i < num_relocations; i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; @@ -370,8 +765,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, type = ELF_RISCV_R_TYPE(rel[i].r_info); - if (type < ARRAY_SIZE(reloc_handlers_rela)) - handler = reloc_handlers_rela[type]; + if (type < ARRAY_SIZE(reloc_handlers)) + handler = reloc_handlers[type].reloc_handler; else handler = NULL; @@ -427,11 +822,16 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } } - res = handler(me, location, v); + if (reloc_handlers[type].accumulate_handler) + res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + else + res = handler(me, location, v); if (res) return res; } + process_accumulated_relocations(me); + return 0; } diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S index 21bac92a170a..f2cd83d9b0f0 100644 --- a/arch/riscv/kernel/probes/rethook_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,7 +75,7 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(arch_rethook_trampoline) +SYM_CODE_START(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs @@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(arch_rethook_trampoline) +SYM_CODE_END(arch_rethook_trampoline) diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index d3099d67816d..6c166029079c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index, unsigned long val) { if (index == 0) - return false; + return true; else if (index <= 31) *((unsigned long *)regs + index) = val; else diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 194f166b2cc4..4b3dc8beaf77 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -3,6 +3,7 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/uprobes.h> +#include <asm/insn.h> #include "decode-insn.h" @@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn) #endif } +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn); +} + unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e32d737e039f..4f21d970a129 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -25,6 +25,7 @@ #include <asm/thread_info.h> #include <asm/cpuidle.h> #include <asm/vector.h> +#include <asm/cpufeature.h> register unsigned long gp_in_global __asm__("gp"); @@ -41,6 +42,23 @@ void arch_cpu_idle(void) cpu_do_idle(); } +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + tsk->thread.align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); +} + void __show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index c672c8ba9a2a..5a62ed1da453 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -11,6 +11,7 @@ #include <linux/reboot.h> #include <asm/sbi.h> #include <asm/smp.h> +#include <asm/tlbflush.h> /* default SBI version is 0.1 */ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; @@ -377,31 +378,14 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask) EXPORT_SYMBOL(sbi_remote_fence_i); /** - * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote - * harts for the specified virtual address range. - * @cpu_mask: A cpu mask containing all the target harts. - * @start: Start of the virtual address - * @size: Total size of the virtual address range. - * - * Return: 0 on success, appropriate linux error code otherwise. - */ -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size) -{ - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - cpu_mask, start, size, 0, 0); -} -EXPORT_SYMBOL(sbi_remote_sfence_vma); - -/** * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given - * remote harts for a virtual address range belonging to a specific ASID. + * remote harts for a virtual address range belonging to a specific ASID or not. * * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. - * @asid: The value of address space identifier (ASID). + * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID + * for flushing all address spaces. * * Return: 0 on success, appropriate linux error code otherwise. */ @@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long size, unsigned long asid) { - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - cpu_mask, start, size, asid, 0); + if (asid == FLUSH_TLB_NO_ASID) + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start, size, 0, 0); + else + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 0624f44d43ec..535a837de55d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -25,6 +25,7 @@ #include <asm/acpi.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> @@ -289,10 +290,13 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); init_rt_signal_env(); apply_boot_alternatives(); + if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) riscv_noncoherent_supported(); riscv_set_dma_cache_alignment(); + + riscv_user_isa_enable(); } static int __init topology_init(void) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 21a4d0e111bc..88b6220b2608 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -384,30 +384,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; - /* Are we from a system call? */ - if (regs->cause == EXC_SYSCALL) { - /* Avoid additional syscall restarting via ret_from_exception */ - regs->cause = -1UL; - /* If so, check system call restarting.. */ - switch (regs->a0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->a0 = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { - regs->a0 = -EINTR; - break; - } - fallthrough; - case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; - } - } - rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ @@ -421,36 +397,67 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) void arch_do_signal_or_restart(struct pt_regs *regs) { + unsigned long continue_addr = 0, restart_addr = 0; + int retval = 0; struct ksignal ksig; + bool syscall = (regs->cause == EXC_SYSCALL); - if (get_signal(&ksig)) { - /* Actually deliver the signal */ - handle_signal(&ksig, regs); - return; - } + /* If we were from a system call, check for system call restarting */ + if (syscall) { + continue_addr = regs->epc; + restart_addr = continue_addr - 4; + retval = regs->a0; - /* Did we come from a system call? */ - if (regs->cause == EXC_SYSCALL) { /* Avoid additional syscall restarting via ret_from_exception */ regs->cause = -1UL; - /* Restart the system call - no handlers present */ - switch (regs->a0) { + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; case -ERESTART_RESTARTBLOCK: - regs->a0 = regs->orig_a0; - regs->a7 = __NR_restart_syscall; - regs->epc -= 0x4; + regs->a0 = regs->orig_a0; + regs->epc = restart_addr; break; } } /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all of our registers. + */ + if (get_signal(&ksig)) { + /* + * Depending on the signal settings, we may need to revert the + * decision to restart the system call, but skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->epc == restart_addr && + (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK || + (retval == -ERESTARTSYS && + !(ksig.ka.sa.sa_flags & SA_RESTART)))) { + regs->a0 = -EINTR; + regs->epc = continue_addr; + } + + /* Actually deliver the signal */ + handle_signal(&ksig, regs); + return; + } + + /* + * Handle restarting a different system call. As above, if a debugger + * has chosen to restart at a different PC, ignore the restart. + */ + if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK) + regs->a7 = __NR_restart_syscall; + + /* * If there is no signal to deliver, we just put the saved * sigmask back. */ diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 1b8da4e40a4d..d162bf339beb 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -25,6 +25,8 @@ #include <linux/of.h> #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> + +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/cpufeature.h> #include <asm/irq.h> @@ -246,13 +248,14 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); - check_unaligned_access(curr_cpuid); if (has_vector()) { if (riscv_v_setup_vsize()) elf_hwcap &= ~COMPAT_HWCAP_ISA_V; } + riscv_user_isa_enable(); + /* * Remote TLB flushes are ignored while the CPU is offline, so emit * a local TLB flush right now just in case. diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index f7960c7c5f9e..2d54f309c140 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -16,7 +16,7 @@ .altmacro .option norelax -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) /* Save registers (except A0 and T0-T6) */ REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) @@ -57,14 +57,11 @@ ENTRY(__cpu_suspend_enter) /* Return to C code */ ret -END(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) SYM_TYPED_FUNC_START(__cpu_resume_enter) /* Load the global pointer */ - .option push - .option norelax - la gp, __global_pointer$ - .option pop + load_global_pointer #ifdef CONFIG_MMU /* Save A0 and A1 */ diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index b651ec698a91..c712037dbe10 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -145,26 +145,38 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, for_each_cpu(cpu, cpus) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; - if (riscv_isa_extension_available(isainfo->isa, ZBA)) - pair->value |= RISCV_HWPROBE_EXT_ZBA; - else - missing |= RISCV_HWPROBE_EXT_ZBA; - - if (riscv_isa_extension_available(isainfo->isa, ZBB)) - pair->value |= RISCV_HWPROBE_EXT_ZBB; - else - missing |= RISCV_HWPROBE_EXT_ZBB; - - if (riscv_isa_extension_available(isainfo->isa, ZBS)) - pair->value |= RISCV_HWPROBE_EXT_ZBS; - else - missing |= RISCV_HWPROBE_EXT_ZBS; +#define EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_EXT_##ext; \ + } while (false) + + /* + * Only use EXT_KEY() for extensions which can be exposed to userspace, + * regardless of the kernel's configuration, as no other checks, besides + * presence in the hart_isa bitmap, are made. + */ + EXT_KEY(ZBA); + EXT_KEY(ZBB); + EXT_KEY(ZBS); + EXT_KEY(ZICBOZ); +#undef EXT_KEY } /* Now turn off reporting features if any CPU is missing it. */ pair->value &= ~missing; } +static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +{ + struct riscv_hwprobe pair; + + hwprobe_isa_ext0(&pair, cpus); + return (pair.value & ext); +} + static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -215,6 +227,12 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) + pair->value = riscv_cboz_block_size; + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug new file mode 100644 index 000000000000..5dba64e8e977 --- /dev/null +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "arch/riscv/kernel Testing and Coverage" + +config AS_HAS_ULEB128 + def_bool $(as-instr,.reloc label$(comma) R_RISCV_SET_ULEB128$(comma) 127\n.reloc label$(comma) R_RISCV_SUB_ULEB128$(comma) 127\nlabel:\n.word 0) + +menuconfig RUNTIME_KERNEL_TESTING_MENU + bool "arch/riscv/kernel runtime Testing" + def_bool y + help + Enable riscv kernel runtime testing. + +if RUNTIME_KERNEL_TESTING_MENU + +config RISCV_MODULE_LINKING_KUNIT + bool "KUnit test riscv module linking at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test riscv module linking at boot. This will + enable a module called "test_module_linking". + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + +endif # RUNTIME_TESTING_MENU + +endmenu # "arch/riscv/kernel runtime Testing" diff --git a/arch/riscv/kernel/tests/Makefile b/arch/riscv/kernel/tests/Makefile new file mode 100644 index 000000000000..7d6c76cffe20 --- /dev/null +++ b/arch/riscv/kernel/tests/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RISCV_MODULE_LINKING_KUNIT) += module_test/ diff --git a/arch/riscv/kernel/tests/module_test/Makefile b/arch/riscv/kernel/tests/module_test/Makefile new file mode 100644 index 000000000000..d7a6fd8943de --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/Makefile @@ -0,0 +1,15 @@ +obj-m += test_module_linking.o + +test_sub := test_sub6.o test_sub8.o test_sub16.o test_sub32.o test_sub64.o + +test_set := test_set6.o test_set8.o test_set16.o test_set32.o + +test_module_linking-objs += $(test_sub) + +test_module_linking-objs += $(test_set) + +ifeq ($(CONFIG_AS_HAS_ULEB128),y) +test_module_linking-objs += test_uleb128.o +endif + +test_module_linking-objs += test_module_linking_main.o diff --git a/arch/riscv/kernel/tests/module_test/test_module_linking_main.c b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c new file mode 100644 index 000000000000..8df5fa5b834e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Rivos Inc. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <kunit/test.h> + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module linking"); + +extern int test_set32(void); +extern int test_set16(void); +extern int test_set8(void); +extern int test_set6(void); +extern long test_sub64(void); +extern int test_sub32(void); +extern int test_sub16(void); +extern int test_sub8(void); +extern int test_sub6(void); + +#ifdef CONFIG_AS_HAS_ULEB128 +extern int test_uleb_basic(void); +extern int test_uleb_large(void); +#endif + +#define CHECK_EQ(lhs, rhs) KUNIT_ASSERT_EQ(test, lhs, rhs) + +void run_test_set(struct kunit *test); +void run_test_sub(struct kunit *test); +void run_test_uleb(struct kunit *test); + +void run_test_set(struct kunit *test) +{ + int val32 = test_set32(); + int val16 = test_set16(); + int val8 = test_set8(); + int val6 = test_set6(); + + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +void run_test_sub(struct kunit *test) +{ + int val64 = test_sub64(); + int val32 = test_sub32(); + int val16 = test_sub16(); + int val8 = test_sub8(); + int val6 = test_sub6(); + + CHECK_EQ(val64, 0); + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +#ifdef CONFIG_AS_HAS_ULEB128 +void run_test_uleb(struct kunit *test) +{ + int val_uleb = test_uleb_basic(); + int val_uleb2 = test_uleb_large(); + + CHECK_EQ(val_uleb, 0); + CHECK_EQ(val_uleb2, 0); +} +#endif + +static struct kunit_case __refdata riscv_module_linking_test_cases[] = { + KUNIT_CASE(run_test_set), + KUNIT_CASE(run_test_sub), +#ifdef CONFIG_AS_HAS_ULEB128 + KUNIT_CASE(run_test_uleb), +#endif + {} +}; + +static struct kunit_suite riscv_module_linking_test_suite = { + .name = "riscv_checksum", + .test_cases = riscv_module_linking_test_cases, +}; + +kunit_test_suites(&riscv_module_linking_test_suite); diff --git a/arch/riscv/kernel/tests/module_test/test_set16.S b/arch/riscv/kernel/tests/module_test/test_set16.S new file mode 100644 index 000000000000..2be0e441a12e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set16.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set16 +test_set16: + lw a0, set16 + la t0, set16 +#ifdef CONFIG_32BIT + slli t0, t0, 16 + srli t0, t0, 16 +#else + slli t0, t0, 48 + srli t0, t0, 48 +#endif + sub a0, a0, t0 + ret +.data +set16: + .reloc set16, R_RISCV_SET16, set16 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set32.S b/arch/riscv/kernel/tests/module_test/test_set32.S new file mode 100644 index 000000000000..de0444537e67 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set32 +test_set32: + lw a0, set32 + la t0, set32 +#ifndef CONFIG_32BIT + slli t0, t0, 32 + srli t0, t0, 32 +#endif + sub a0, a0, t0 + ret +.data +set32: + .reloc set32, R_RISCV_SET32, set32 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set6.S b/arch/riscv/kernel/tests/module_test/test_set6.S new file mode 100644 index 000000000000..c39ce4c219eb --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set6.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set6 +test_set6: + lw a0, set6 + la t0, set6 +#ifdef CONFIG_32BIT + slli t0, t0, 26 + srli t0, t0, 26 +#else + slli t0, t0, 58 + srli t0, t0, 58 +#endif + sub a0, a0, t0 + ret +.data +set6: + .reloc set6, R_RISCV_SET6, set6 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set8.S b/arch/riscv/kernel/tests/module_test/test_set8.S new file mode 100644 index 000000000000..a656173f6f99 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set8.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set8 +test_set8: + lw a0, set8 + la t0, set8 +#ifdef CONFIG_32BIT + slli t0, t0, 24 + srli t0, t0, 24 +#else + slli t0, t0, 56 + srli t0, t0, 56 +#endif + sub a0, a0, t0 + ret +.data +set8: + .reloc set8, R_RISCV_SET8, set8 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub16.S b/arch/riscv/kernel/tests/module_test/test_sub16.S new file mode 100644 index 000000000000..80f731d599ba --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub16.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub16 +test_sub16: + lh a0, sub16 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub16: + .reloc sub16, R_RISCV_ADD16, second + .reloc sub16, R_RISCV_SUB16, first + .half 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub32.S b/arch/riscv/kernel/tests/module_test/test_sub32.S new file mode 100644 index 000000000000..a341686e12df --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub32 +test_sub32: + lw a0, sub32 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub32: + .reloc sub32, R_RISCV_ADD32, second + .reloc sub32, R_RISCV_SUB32, first + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub6.S b/arch/riscv/kernel/tests/module_test/test_sub6.S new file mode 100644 index 000000000000..e8b61c1ec527 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub6.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub6 +test_sub6: + lb a0, sub6 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub6: + .reloc sub6, R_RISCV_SET6, second + .reloc sub6, R_RISCV_SUB6, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub64.S b/arch/riscv/kernel/tests/module_test/test_sub64.S new file mode 100644 index 000000000000..a59e8afa88fd --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub64.S @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub64 +test_sub64: +#ifdef CONFIG_32BIT + lw a0, sub64 +#else + ld a0, sub64 +#endif + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub64: + .reloc sub64, R_RISCV_ADD64, second + .reloc sub64, R_RISCV_SUB64, first + .word 0 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub8.S b/arch/riscv/kernel/tests/module_test/test_sub8.S new file mode 100644 index 000000000000..ac5d0ec98de3 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub8.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub8 +test_sub8: + lb a0, sub8 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub8: + .reloc sub8, R_RISCV_ADD8, second + .reloc sub8, R_RISCV_SUB8, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S new file mode 100644 index 000000000000..90f22049d553 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_uleb_basic +test_uleb_basic: + ld a0, second + addi a0, a0, -127 + ret + +.global test_uleb_large +test_uleb_large: + ld a0, fourth + addi a0, a0, -0x07e8 + ret + +.data +first: + .space 127 +second: + .reloc second, R_RISCV_SET_ULEB128, second + .reloc second, R_RISCV_SUB_ULEB128, first + .dword 0 +third: + .space 1000 +fourth: + .reloc fourth, R_RISCV_SET_ULEB128, fourth + .reloc fourth, R_RISCV_SUB_ULEB128, third + .dword 0 diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index fae8f610d867..a1b9be3c4332 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -36,7 +36,21 @@ int show_unhandled_signals = 1; static DEFINE_SPINLOCK(die_lock); -static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) +static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) +{ + const void __user *uaddr = (__force const void __user *)insns; + + if (!user_mode(regs)) + return get_kernel_nofault(*val, insns); + + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; + + return copy_from_user_nofault(val, uaddr, sizeof(*val)); +} + +static void dump_instr(const char *loglvl, struct pt_regs *regs) { char str[sizeof("0000 ") * 12 + 2 + 1], *p = str; const u16 *insns = (u16 *)instruction_pointer(regs); @@ -45,7 +59,7 @@ static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) int i; for (i = -10; i < 2; i++) { - bad = get_kernel_nofault(val, &insns[i]); + bad = copy_code(regs, &val, &insns[i]); if (!bad) { p += sprintf(p, i == 0 ? "(%04hx) " : "%04hx ", val); } else { @@ -74,7 +88,7 @@ void die(struct pt_regs *regs, const char *str) print_modules(); if (regs) { show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + dump_instr(KERN_EMERG, regs); } cause = regs ? regs->cause : -1; @@ -107,6 +121,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); + dump_instr(KERN_EMERG, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -181,14 +196,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -#ifndef CONFIG_RISCV_M_MODE -DO_ERROR_INFO(do_trap_load_misaligned, - SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); -DO_ERROR_INFO(do_trap_store_misaligned, - SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); -#else -int handle_misaligned_load(struct pt_regs *regs); -int handle_misaligned_store(struct pt_regs *regs); asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { @@ -231,7 +238,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs irqentry_nmi_exit(regs, state); } } -#endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, @@ -360,34 +366,10 @@ static void noinstr handle_riscv_irq(struct pt_regs *regs) asmlinkage void noinstr do_irq(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); -#ifdef CONFIG_IRQ_STACKS - if (on_thread_stack()) { - ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id()) - + IRQ_STACK_SIZE/sizeof(ulong); - __asm__ __volatile( - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" ra, (sp) \n" - "addi sp, sp, -"RISCV_SZPTR "\n" - REG_S" s0, (sp) \n" - "addi s0, sp, 2*"RISCV_SZPTR "\n" - "move sp, %[sp] \n" - "move a0, %[regs] \n" - "call handle_riscv_irq \n" - "addi sp, s0, -2*"RISCV_SZPTR"\n" - REG_L" s0, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - REG_L" ra, (sp) \n" - "addi sp, sp, "RISCV_SZPTR "\n" - : - : [sp] "r" (sp), [regs] "r" (regs) - : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "t0", "t1", "t2", "t3", "t4", "t5", "t6", -#ifndef CONFIG_FRAME_POINTER - "s0", -#endif - "memory"); - } else -#endif + + if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack()) + call_on_irq_stack(regs, handle_riscv_irq); + else handle_riscv_irq(regs); irqentry_exit(regs, state); @@ -410,48 +392,14 @@ int is_valid_bugaddr(unsigned long pc) #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_VMAP_STACK -/* - * Extra stack space that allows us to provide panic messages when the kernel - * has overflowed its stack. - */ -static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], +DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); -/* - * A temporary stack for use by handle_kernel_stack_overflow. This is used so - * we can call into C code to get the per-hart overflow stack. Usage of this - * stack must be protected by spin_shadow_stack. - */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); - -/* - * A pseudo spinlock to protect the shadow stack from being used by multiple - * harts concurrently. This isn't a real spinlock because the lock side must - * be taken without a valid stack and only a single register, it's only taken - * while in the process of panicing anyway so the performance and error - * checking a proper spinlock gives us doesn't matter. - */ -unsigned long spin_shadow_stack; - -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); - /* - * We're done with the shadow stack by this point, as we're on the - * overflow stack. Tell any other concurrent overflowing harts that - * they can proceed with panicing by releasing the pseudo-spinlock. - * - * This pairs with an amoswap.aq in handle_kernel_stack_overflow. - */ - smp_store_release(&spin_shadow_stack, 0); - console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 378f5b151443..5eba37147caa 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -6,12 +6,16 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/perf_event.h> #include <linux/irq.h> #include <linux/stringify.h> #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/csr.h> +#include <asm/entry-common.h> +#include <asm/hwprobe.h> +#include <asm/cpufeature.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -151,53 +155,134 @@ #define PRECISION_S 0 #define PRECISION_D 1 -#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ -static inline type load_##type(const type *addr) \ -{ \ - type val; \ - asm (#insn " %0, %1" \ - : "=&r" (val) : "m" (*addr)); \ - return val; \ +#ifdef CONFIG_FPU + +#define FP_GET_RD(insn) (insn >> 7 & 0x1F) + +extern void put_f32_reg(unsigned long fp_reg, unsigned long value); + +static int set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + + put_f32_reg(fp_reg, val); + regs->status |= SR_FS_DIRTY; + + return 0; } -#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ -static inline void store_##type(type *addr, type val) \ -{ \ - asm volatile (#insn " %0, %1\n" \ - : : "r" (val), "m" (*addr)); \ +extern void put_f64_reg(unsigned long fp_reg, unsigned long value); + +static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + unsigned long value; + +#if __riscv_xlen == 32 + value = (unsigned long) &val; +#else + value = val; +#endif + put_f64_reg(fp_reg, value); + regs->status |= SR_FS_DIRTY; + + return 0; } -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) -#if defined(CONFIG_64BIT) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) +#if __riscv_xlen == 32 +extern void get_f64_reg(unsigned long fp_reg, u64 *value); + +static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + u64 val; + + get_f64_reg(fp_reg, &val); + regs->status |= SR_FS_DIRTY; + + return val; +} #else -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) -static inline u64 load_u64(const u64 *addr) +extern unsigned long get_f64_reg(unsigned long fp_reg); + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) { - return load_u32((u32 *)addr) - + ((u64)load_u32((u32 *)addr + 1) << 32); + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f64_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; } -static inline void store_u64(u64 *addr, u64 val) +#endif + +extern unsigned long get_f32_reg(unsigned long fp_reg); + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) { - store_u32((u32 *)addr, val); - store_u32((u32 *)addr + 1, val >> 32); + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f32_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; } + +#else /* CONFIG_FPU */ +static void set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) {} + +static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {} + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + #endif -static inline ulong get_insn(ulong mepc) +#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs)) +#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs)) +#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs)) + +#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs)) +#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) +#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) + +#ifdef CONFIG_RISCV_M_MODE +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) +{ + u8 val; + + asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); + *r_val = val; + + return 0; +} + +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) +{ + asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); + + return 0; +} + +static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) { register ulong __mepc asm ("a2") = mepc; ulong val, rvc_mask = 3, tmp; @@ -226,23 +311,119 @@ static inline ulong get_insn(ulong mepc) : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), [xlen_minus_16] "i" (XLEN_MINUS_16)); - return val; + *r_insn = val; + + return 0; +} +#else +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) +{ + if (user_mode(regs)) { + return __get_user(*r_val, addr); + } else { + *r_val = *addr; + return 0; + } +} + +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) +{ + if (user_mode(regs)) { + return __put_user(val, addr); + } else { + *addr = val; + return 0; + } } +#define __read_insn(regs, insn, insn_addr) \ +({ \ + int __ret; \ + \ + if (user_mode(regs)) { \ + __ret = __get_user(insn, insn_addr); \ + } else { \ + insn = *insn_addr; \ + __ret = 0; \ + } \ + \ + __ret; \ +}) + +static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) +{ + ulong insn = 0; + + if (epc & 0x2) { + ulong tmp = 0; + u16 __user *insn_addr = (u16 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + /* __get_user() uses regular "lw" which sign extend the loaded + * value make sure to clear higher order bits in case we "or" it + * below with the upper 16 bits half. + */ + insn &= GENMASK(15, 0); + if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn_addr++; + if (__read_insn(regs, tmp, insn_addr)) + return -EFAULT; + *r_insn = (tmp << 16) | insn; + + return 0; + } else { + u32 __user *insn_addr = (u32 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn &= GENMASK(15, 0); + *r_insn = insn; + + return 0; + } +} +#endif + union reg_data { u8 data_bytes[8]; ulong data_ulong; u64 data_u64; }; +static bool unaligned_ctl __read_mostly; + +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ + int handle_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); + unsigned long insn; + unsigned long addr = regs->badaddr; int i, fp = 0, shift = 0, len = 0; + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + + if (!unaligned_enabled) + return -1; + + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + + if (get_insn(regs, epc, &insn)) + return -1; + regs->epc = 0; if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { @@ -305,13 +486,21 @@ int handle_misaligned_load(struct pt_regs *regs) return -1; } + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + val.data_u64 = 0; - for (i = 0; i < len; i++) - val.data_bytes[i] = load_u8((void *)(addr + i)); + for (i = 0; i < len; i++) { + if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + return -1; + } - if (fp) - return -1; - SET_RD(insn, regs, val.data_ulong << shift >> shift); + if (!fp) + SET_RD(insn, regs, val.data_ulong << shift >> shift); + else if (len == 8) + set_f64_rd(insn, regs, val.data_u64); + else + set_f32_rd(insn, regs, val.data_ulong); regs->epc = epc + INSN_LEN(insn); @@ -322,9 +511,20 @@ int handle_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); - int i, len = 0; + unsigned long insn; + unsigned long addr = regs->badaddr; + int i, len = 0, fp = 0; + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + + if (!unaligned_enabled) + return -1; + + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + + if (get_insn(regs, epc, &insn)) + return -1; regs->epc = 0; @@ -336,6 +536,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { len = 8; #endif + } else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2(insn, regs); + } else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2(insn, regs); } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { len = 2; #if defined(CONFIG_64BIT) @@ -354,15 +562,88 @@ int handle_misaligned_store(struct pt_regs *regs) ((insn >> SH_RD) & 0x1f)) { len = 4; val.data_ulong = GET_RS2C(insn, regs); + } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2C(insn, regs); +#if !defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2C(insn, regs); +#endif } else { regs->epc = epc; return -1; } - for (i = 0; i < len; i++) - store_u8((void *)(addr + i), val.data_bytes[i]); + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + + for (i = 0; i < len; i++) { + if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + return -1; + } regs->epc = epc + INSN_LEN(insn); return 0; } + +bool check_unaligned_access_emulated(int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + unsigned long tmp_var, tmp_val; + bool misaligned_emu_detected; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + + __asm__ __volatile__ ( + " "REG_L" %[tmp], 1(%[ptr])\n" + : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); + + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + /* + * If unaligned_ctl is already set, this means that we detected that all + * CPUS uses emulated misaligned access at boot time. If that changed + * when hotplugging the new cpu, this is something we don't handle. + */ + if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); + while (true) + cpu_relax(); + } + + return misaligned_emu_detected; +} + +void unaligned_emulation_finish(void) +{ + int cpu; + + /* + * We can only support PR_UNALIGN controls if all CPUs have misaligned + * accesses emulated since tasks requesting such control can run on any + * CPU. + */ + for_each_present_cpu(cpu) { + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_EMULATED) { + return; + } + } + unaligned_ctl = true; +} + +bool unaligned_ctl_available(void) +{ + return unaligned_ctl; +} diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index e8aa7c380007..9b517fe1b8a8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -36,7 +36,7 @@ CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY endif # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index 82f97d67c23e..8f884227e8bc 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -8,7 +8,7 @@ .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ -ENTRY(__vdso_flush_icache) +SYM_FUNC_START(__vdso_flush_icache) .cfi_startproc #ifdef CONFIG_SMP li a7, __NR_riscv_flush_icache @@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache) #endif ret .cfi_endproc -ENDPROC(__vdso_flush_icache) +SYM_FUNC_END(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S index bb0c05e2ffba..9c1bd531907f 100644 --- a/arch/riscv/kernel/vdso/getcpu.S +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -8,11 +8,11 @@ .text /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ -ENTRY(__vdso_getcpu) +SYM_FUNC_START(__vdso_getcpu) .cfi_startproc /* For now, just do the syscall. */ li a7, __NR_getcpu ecall ret .cfi_endproc -ENDPROC(__vdso_getcpu) +SYM_FUNC_END(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index d40bec6ac078..cadf725ef798 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -37,7 +37,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, /* This is something we can handle, fill out the pairs. */ while (p < end) { - if (p->key <= RISCV_HWPROBE_MAX_KEY) { + if (riscv_hwprobe_key_is_valid(p->key)) { p->value = avd->all_cpu_hwprobe_values[p->key]; } else { diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S index 10438c7c626a..3dc022aa8931 100644 --- a/arch/riscv/kernel/vdso/rt_sigreturn.S +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -7,10 +7,10 @@ #include <asm/unistd.h> .text -ENTRY(__vdso_rt_sigreturn) +SYM_FUNC_START(__vdso_rt_sigreturn) .cfi_startproc .cfi_signal_frame li a7, __NR_rt_sigreturn ecall .cfi_endproc -ENDPROC(__vdso_rt_sigreturn) +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S index 4e704146c77a..77e57f830521 100644 --- a/arch/riscv/kernel/vdso/sys_hwprobe.S +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -5,11 +5,11 @@ #include <asm/unistd.h> .text -ENTRY(riscv_hwprobe) +SYM_FUNC_START(riscv_hwprobe) .cfi_startproc li a7, __NR_riscv_hwprobe ecall ret .cfi_endproc -ENDPROC(riscv_hwprobe) +SYM_FUNC_END(riscv_hwprobe) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 82ce64900f3d..cbe2a179331d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -23,35 +23,31 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } - .note : { *(.note.*) } :text :note .dynamic : { *(.dynamic) } :text :dynamic + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } + + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - /* - * This linker script is used both with -r and with -shared. - * For the layouts to match, we need to skip more than enough - * space for the dynamic symbol table, etc. If this amount is - * insufficient, ld -shared will error; simply increase it here. + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. */ - . = 0x800; + . = ALIGN(16); .text : { *(.text .text.*) } :text . = ALIGN(4); .alternative : { - __alt_start = .; *(.alternative) - __alt_end = .; - } - - .data : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) } } diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 74bb27440527..a944294f6f23 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -14,7 +14,7 @@ #include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/spinlock.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_aia_imsic.h> struct aia_hgei_control { diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 48ae0d4b3932..225a435d9c9a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/kvm_host.h> #include <asm/csr.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/sbi.h> long kvm_arch_dev_ioctl(struct file *filp, diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 44bc324aeeb0..23c0e82b5103 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -12,7 +12,7 @@ #include <linux/kvm_host.h> #include <asm/cacheflush.h> #include <asm/csr.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/insn-def.h> #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 08ba48a395aa..030904d82b58 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,7 +11,7 @@ #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index c6ebce6126b5..f8c9fa0c03c5 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -13,7 +13,7 @@ #include <linux/uaccess.h> #include <linux/kvm_host.h> #include <asm/cacheflush.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> #include <asm/vector.h> diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index b430cbb69521..b339a2682f25 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -11,7 +11,7 @@ #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> #include <asm/vector.h> diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S index d7a256eb53f4..b22de1231144 100644 --- a/arch/riscv/lib/clear_page.S +++ b/arch/riscv/lib/clear_page.S @@ -29,41 +29,41 @@ SYM_FUNC_START(clear_page) lw a1, riscv_cboz_block_size add a2, a0, a2 .Lzero_loop: - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 bltu a0, a2, .Lzero_loop ret diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 1a40d01a9543..44e009ec5fef 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,8 +7,7 @@ #include <asm/asm.h> /* void *memcpy(void *, const void *, size_t) */ -ENTRY(__memcpy) -WEAK(memcpy) +SYM_FUNC_START(__memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -105,6 +104,7 @@ WEAK(memcpy) bltu a1, a3, 5b 6: ret -END(__memcpy) +SYM_FUNC_END(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 838ff2022fe3..cb3e2e7ef0ba 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -7,7 +7,6 @@ #include <asm/asm.h> SYM_FUNC_START(__memmove) -SYM_FUNC_START_WEAK(memmove) /* * Returns * a0 - dest @@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove) */ /* Return if nothing to do */ - beq a0, a1, return_from_memmove - beqz a2, return_from_memmove + beq a0, a1, .Lreturn_from_memmove + beqz a2, .Lreturn_from_memmove /* * Register Uses @@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove) * small enough not to bother. */ andi t0, a2, -(2 * SZREG) - beqz t0, byte_copy + beqz t0, .Lbyte_copy /* * Now solve for t5 and t6. @@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove) */ xor t0, a0, a1 andi t1, t0, (SZREG - 1) - beqz t1, coaligned_copy + beqz t1, .Lcoaligned_copy /* Fall through to misaligned fixup copy */ -misaligned_fixup_copy: - bltu a1, a0, misaligned_fixup_copy_reverse +.Lmisaligned_fixup_copy: + bltu a1, a0, .Lmisaligned_fixup_copy_reverse -misaligned_fixup_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lmisaligned_fixup_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -153,10 +152,10 @@ misaligned_fixup_copy_forward: mv t3, t6 /* Fix the dest pointer in case the loop was broken */ add a1, t3, a5 /* Restore the src pointer */ - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -misaligned_fixup_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lmisaligned_fixup_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse: mv t4, t5 /* Fix the dest pointer in case the loop was broken */ add a4, t4, a5 /* Restore the src pointer */ - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * Simple copy loops for SZREG co-aligned memory locations. * These also make calls to do byte copies for any unaligned * data at their terminations. */ -coaligned_copy: - bltu a1, a0, coaligned_copy_reverse +.Lcoaligned_copy: + bltu a1, a0, .Lcoaligned_copy_reverse -coaligned_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lcoaligned_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward 1: REG_L t1, ( 0 * SZREG)(a1) @@ -235,10 +234,10 @@ coaligned_copy_forward: REG_S t1, (-1 * SZREG)(t3) bne t3, t6, 1b - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -coaligned_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lcoaligned_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse 1: REG_L t1, (-1 * SZREG)(a4) @@ -247,7 +246,7 @@ coaligned_copy_reverse: REG_S t1, ( 0 * SZREG)(t4) bne t4, t5, 1b - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * These are basically sub-functions within the function. They @@ -258,7 +257,7 @@ coaligned_copy_reverse: * up from where they were left and we avoid code duplication * without any overhead except the call in and return jumps. */ -byte_copy_until_aligned_forward: +.Lbyte_copy_until_aligned_forward: beq t3, t5, 2f 1: lb t1, 0(a1) @@ -269,7 +268,7 @@ byte_copy_until_aligned_forward: 2: jalr zero, 0x0(t0) /* Return to multibyte copy loop */ -byte_copy_until_aligned_reverse: +.Lbyte_copy_until_aligned_reverse: beq t4, t6, 2f 1: lb t1, -1(a4) @@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse: * These will byte copy until they reach the end of data to copy. * At that point, they will call to return from memmove. */ -byte_copy: - bltu a1, a0, byte_copy_reverse +.Lbyte_copy: + bltu a1, a0, .Lbyte_copy_reverse -byte_copy_forward: +.Lbyte_copy_forward: beq t3, t4, 2f 1: lb t1, 0(a1) @@ -299,7 +298,7 @@ byte_copy_forward: 2: ret -byte_copy_reverse: +.Lbyte_copy_reverse: beq t4, t3, 2f 1: lb t1, -1(a4) @@ -309,10 +308,10 @@ byte_copy_reverse: bne t4, t3, 1b 2: -return_from_memmove: +.Lreturn_from_memmove: ret -SYM_FUNC_END(memmove) SYM_FUNC_END(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) SYM_FUNC_ALIAS(__pi_memmove, __memmove) SYM_FUNC_ALIAS(__pi___memmove, __memmove) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 34c5360c6705..35f358e70bdb 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,8 +8,7 @@ #include <asm/asm.h> /* void *memset(void *, int, size_t) */ -ENTRY(__memset) -WEAK(memset) +SYM_FUNC_START(__memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -110,4 +109,5 @@ WEAK(memset) bltu t0, a3, 5b 6: ret -END(__memset) +SYM_FUNC_END(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 09b47ebacf2e..3ab438f30d13 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -10,8 +10,7 @@ _asm_extable 100b, \lbl .endm -ENTRY(__asm_copy_to_user) -ENTRY(__asm_copy_from_user) +SYM_FUNC_START(__asm_copy_to_user) /* Enable access to user memory */ li t6, SR_SUM @@ -181,13 +180,13 @@ ENTRY(__asm_copy_from_user) csrc CSR_STATUS, t6 sub a0, t5, a0 ret -ENDPROC(__asm_copy_to_user) -ENDPROC(__asm_copy_from_user) +SYM_FUNC_END(__asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) -ENTRY(__clear_user) +SYM_FUNC_START(__clear_user) /* Enable access to user memory */ li t6, SR_SUM @@ -233,5 +232,5 @@ ENTRY(__clear_user) csrc CSR_STATUS, t6 sub a0, a3, a0 ret -ENDPROC(__clear_user) +SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 9c454f90fd3d..3a4dfc8babcf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -36,3 +36,4 @@ endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o +obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c new file mode 100644 index 000000000000..a993ad11d0ec --- /dev/null +++ b/arch/riscv/mm/cache-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#include <asm/dma-noncoherent.h> + +struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init; + +void +riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) +{ + if (!ops) + return; + noncoherent_cache_ops = *ops; +} +EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index f1387272a551..55a34f2020a8 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -3,7 +3,9 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/of.h> +#include <asm/acpi.h> #include <asm/cacheflush.h> #ifdef CONFIG_SMP @@ -124,13 +126,24 @@ void __init riscv_init_cbo_blocksizes(void) unsigned long cbom_hartid, cboz_hartid; u32 cbom_block_size = 0, cboz_block_size = 0; struct device_node *node; + struct acpi_table_header *rhct; + acpi_status status; + + if (acpi_disabled) { + for_each_of_cpu_node(node) { + /* set block-size for cbom and/or cboz extension if available */ + cbo_get_block_size(node, "riscv,cbom-block-size", + &cbom_block_size, &cbom_hartid); + cbo_get_block_size(node, "riscv,cboz-block-size", + &cboz_block_size, &cboz_hartid); + } + } else { + status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); + if (ACPI_FAILURE(status)) + return; - for_each_of_cpu_node(node) { - /* set block-size for cbom and/or cboz extension if available */ - cbo_get_block_size(node, "riscv,cbom-block-size", - &cbom_block_size, &cbom_hartid); - cbo_get_block_size(node, "riscv,cboz-block-size", - &cboz_block_size, &cboz_hartid); + acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL); + acpi_put_table((struct acpi_table_header *)rhct); } if (cbom_block_size) diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index b76e7e192eb1..4e4e469b8dd6 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init; int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN; EXPORT_SYMBOL_GPL(dma_cache_alignment); -struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = { - .wback = NULL, - .inv = NULL, - .wback_inv = NULL, -}; - static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); @@ -31,7 +25,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) return; } #endif - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) @@ -45,7 +39,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) @@ -59,7 +53,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, vaddr, size, riscv_cbom_block_size); } static inline bool arch_sync_dma_clean_before_fromdevice(void) @@ -131,7 +125,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } #endif - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, @@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void) if (!noncoherent_supported) dma_cache_alignment = 1; } - -void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) -{ - if (!ops) - return; - - noncoherent_cache_ops = *ops; -} -EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d9a4e8702864..2e011cbddf3a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,10 +49,12 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; #endif EXPORT_SYMBOL(satp_mode); +#ifdef CONFIG_64BIT bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); +#endif phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); @@ -664,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) { - if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) - return PGDIR_SIZE; - - if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) + if (pgtable_l5_enabled && + !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; - if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) + if (pgtable_l4_enabled && + !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) return PUD_SIZE; - if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) + if (IS_ENABLED(CONFIG_64BIT) && + !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 161d0b34c2cb..fc5fc4f785c4 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/pagewalk.h> #include <linux/pgtable.h> +#include <linux/vmalloc.h> #include <asm/tlbflush.h> #include <asm/bitops.h> #include <asm/set_memory.h> @@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) return new_val; } -static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgd_t val = READ_ONCE(*pgd); - - if (pgd_leaf(val)) { - val = __pgd(set_pageattr_masks(pgd_val(val), walk)); - set_pgd(pgd, val); - } - - return 0; -} - static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, unsigned long next, struct mm_walk *walk) { @@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next, } static const struct mm_walk_ops pageattr_ops = { - .pgd_entry = pageattr_pgd_entry, .p4d_entry = pageattr_p4d_entry, .pud_entry = pageattr_pud_entry, .pmd_entry = pageattr_pmd_entry, @@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = { .walk_lock = PGWALK_RDLOCK, }; +#ifdef CONFIG_64BIT +static int __split_linear_mapping_pmd(pud_t *pudp, + unsigned long vaddr, unsigned long end) +{ + pmd_t *pmdp; + unsigned long next; + + pmdp = pmd_offset(pudp, vaddr); + + do { + next = pmd_addr_end(vaddr, end); + + if (next - vaddr >= PMD_SIZE && + vaddr <= (vaddr & PMD_MASK) && end >= next) + continue; + + if (pmd_leaf(*pmdp)) { + struct page *pte_page; + unsigned long pfn = _pmd_pfn(*pmdp); + pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK); + pte_t *ptep_new; + int i; + + pte_page = alloc_page(GFP_KERNEL); + if (!pte_page) + return -ENOMEM; + + ptep_new = (pte_t *)page_address(pte_page); + for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new) + set_pte(ptep_new, pfn_pte(pfn + i, prot)); + + smp_wmb(); + + set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE)); + } + } while (pmdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pud(p4d_t *p4dp, + unsigned long vaddr, unsigned long end) +{ + pud_t *pudp; + unsigned long next; + int ret; + + pudp = pud_offset(p4dp, vaddr); + + do { + next = pud_addr_end(vaddr, end); + + if (next - vaddr >= PUD_SIZE && + vaddr <= (vaddr & PUD_MASK) && end >= next) + continue; + + if (pud_leaf(*pudp)) { + struct page *pmd_page; + unsigned long pfn = _pud_pfn(*pudp); + pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK); + pmd_t *pmdp_new; + int i; + + pmd_page = alloc_page(GFP_KERNEL); + if (!pmd_page) + return -ENOMEM; + + pmdp_new = (pmd_t *)page_address(pmd_page); + for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new) + set_pmd(pmdp_new, + pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot)); + + smp_wmb(); + + set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pmd(pudp, vaddr, next); + if (ret) + return ret; + } while (pudp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + p4d_t *p4dp; + unsigned long next; + int ret; + + p4dp = p4d_offset(pgdp, vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + /* + * If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't + * need to split, we'll change the protections on the whole P4D. + */ + if (next - vaddr >= P4D_SIZE && + vaddr <= (vaddr & P4D_MASK) && end >= next) + continue; + + if (p4d_leaf(*p4dp)) { + struct page *pud_page; + unsigned long pfn = _p4d_pfn(*p4dp); + pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK); + pud_t *pudp_new; + int i; + + pud_page = alloc_page(GFP_KERNEL); + if (!pud_page) + return -ENOMEM; + + /* + * Fill the pud level with leaf puds that have the same + * protections as the leaf p4d. + */ + pudp_new = (pud_t *)page_address(pud_page); + for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new) + set_pud(pudp_new, + pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot)); + + /* + * Make sure the pud filling is not reordered with the + * p4d store which could result in seeing a partially + * filled pud level. + */ + smp_wmb(); + + set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pud(p4dp, vaddr, next); + if (ret) + return ret; + } while (p4dp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pgd(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + unsigned long next; + int ret; + + do { + next = pgd_addr_end(vaddr, end); + /* We never use PGD mappings for the linear mapping */ + ret = __split_linear_mapping_p4d(pgdp, vaddr, next); + if (ret) + return ret; + } while (pgdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int split_linear_mapping(unsigned long start, unsigned long end) +{ + return __split_linear_mapping_pgd(pgd_offset_k(start), start, end); +} +#endif /* CONFIG_64BIT */ + static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { int ret; unsigned long start = addr; unsigned long end = start + PAGE_SIZE * numpages; + unsigned long __maybe_unused lm_start; + unsigned long __maybe_unused lm_end; struct pageattr_masks masks = { .set_mask = set_mask, .clear_mask = clear_mask @@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, return 0; mmap_write_lock(&init_mm); + +#ifdef CONFIG_64BIT + /* + * We are about to change the permissions of a kernel mapping, we must + * apply the same changes to its linear mapping alias, which may imply + * splitting a huge mapping. + */ + + if (is_vmalloc_or_module_addr((void *)start)) { + struct vm_struct *area = NULL; + int i, page_start; + + area = find_vm_area((void *)start); + page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT; + + for (i = page_start; i < page_start + numpages; ++i) { + lm_start = (unsigned long)page_address(area->pages[i]); + lm_end = lm_start + PAGE_SIZE; + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + } else if (is_kernel_mapping(start) || is_linear_mapping(start)) { + lm_start = (unsigned long)lm_alias(start); + lm_end = (unsigned long)lm_alias(end); + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + +unlock: + mmap_write_unlock(&init_mm); + + /* + * We can't use flush_tlb_kernel_range() here as we may have split a + * hugepage that is larger than that, so let's flush everything. + */ + flush_tlb_all(); +#else + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); +#endif return ret; } @@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = __pgprot(0), - .clear_mask = __pgprot(_PAGE_PRESENT) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + __pgprot(0), __pgprot(_PAGE_PRESENT)); } int set_direct_map_default_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = PAGE_KERNEL, - .clear_mask = __pgprot(0) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + PAGE_KERNEL, __pgprot(0)); } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c index c5fc5ec96f6d..370a422ede11 100644 --- a/arch/riscv/mm/pmem.c +++ b/arch/riscv/mm/pmem.c @@ -17,7 +17,7 @@ void arch_wb_cache_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); @@ -29,6 +29,6 @@ void arch_invalidate_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 20a9f991a6d7..657c27bc07a7 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -129,55 +129,55 @@ static struct ptd_mm_info efi_ptd_info = { /* Page Table Entry */ struct prot_bits { u64 mask; - u64 val; const char *set; const char *clear; }; static const struct prot_bits pte_bits[] = { { +#ifdef CONFIG_64BIT + .mask = _PAGE_NAPOT, + .set = "N", + .clear = ".", + }, { + .mask = _PAGE_MTMASK_SVPBMT, + .set = "MT(%s)", + .clear = " .. ", + }, { +#endif .mask = _PAGE_SOFT, - .val = _PAGE_SOFT, - .set = "RSW", - .clear = " ", + .set = "RSW(%d)", + .clear = " .. ", }, { .mask = _PAGE_DIRTY, - .val = _PAGE_DIRTY, .set = "D", .clear = ".", }, { .mask = _PAGE_ACCESSED, - .val = _PAGE_ACCESSED, .set = "A", .clear = ".", }, { .mask = _PAGE_GLOBAL, - .val = _PAGE_GLOBAL, .set = "G", .clear = ".", }, { .mask = _PAGE_USER, - .val = _PAGE_USER, .set = "U", .clear = ".", }, { .mask = _PAGE_EXEC, - .val = _PAGE_EXEC, .set = "X", .clear = ".", }, { .mask = _PAGE_WRITE, - .val = _PAGE_WRITE, .set = "W", .clear = ".", }, { .mask = _PAGE_READ, - .val = _PAGE_READ, .set = "R", .clear = ".", }, { .mask = _PAGE_PRESENT, - .val = _PAGE_PRESENT, .set = "V", .clear = ".", } @@ -208,15 +208,30 @@ static void dump_prot(struct pg_state *st) unsigned int i; for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { - const char *s; + char s[7]; + unsigned long val; - if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) - s = pte_bits[i].set; - else - s = pte_bits[i].clear; + val = st->current_prot & pte_bits[i].mask; + if (val) { + if (pte_bits[i].mask == _PAGE_SOFT) + sprintf(s, pte_bits[i].set, val >> 8); +#ifdef CONFIG_64BIT + else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { + if (val == _PAGE_NOCACHE_SVPBMT) + sprintf(s, pte_bits[i].set, "NC"); + else if (val == _PAGE_IO_SVPBMT) + sprintf(s, pte_bits[i].set, "IO"); + else + sprintf(s, pte_bits[i].set, "??"); + } +#endif + else + sprintf(s, "%s", pte_bits[i].set); + } else { + sprintf(s, "%s", pte_bits[i].clear); + } - if (s) - pt_dump_seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } @@ -384,6 +399,9 @@ static int __init ptdump_init(void) kernel_ptd_info.base_addr = KERN_VIRT_START; + pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD"; + pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD"; + for (i = 0; i < ARRAY_SIZE(pg_level); i++) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 77be59aadc73..e6659d7368b3 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -3,33 +3,56 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/sched.h> +#include <linux/hugetlb.h> #include <asm/sbi.h> #include <asm/mmu_context.h> static inline void local_flush_tlb_all_asid(unsigned long asid) { - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); + else + local_flush_tlb_all(); } static inline void local_flush_tlb_page_asid(unsigned long addr, unsigned long asid) { - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); + else + local_flush_tlb_page(addr); } -static inline void local_flush_tlb_range(unsigned long start, - unsigned long size, unsigned long stride) +/* + * Flush entire TLB if number of entries to be flushed is greater + * than the threshold below. + */ +static unsigned long tlb_flush_all_threshold __read_mostly = 64; + +static void local_flush_tlb_range_threshold_asid(unsigned long start, + unsigned long size, + unsigned long stride, + unsigned long asid) { - if (size <= stride) - local_flush_tlb_page(start); - else - local_flush_tlb_all(); + unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride); + int i; + + if (nr_ptes_in_range > tlb_flush_all_threshold) { + local_flush_tlb_all_asid(asid); + return; + } + + for (i = 0; i < nr_ptes_in_range; ++i) { + local_flush_tlb_page_asid(start, asid); + start += stride; + } } static inline void local_flush_tlb_range_asid(unsigned long start, @@ -37,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, { if (size <= stride) local_flush_tlb_page_asid(start, asid); - else + else if (size == FLUSH_TLB_MAX_SIZE) local_flush_tlb_all_asid(asid); + else + local_flush_tlb_range_threshold_asid(start, size, stride, asid); } static void __ipi_flush_tlb_all(void *info) @@ -51,7 +76,7 @@ void flush_tlb_all(void) if (riscv_use_ipi_for_rfence()) on_each_cpu(__ipi_flush_tlb_all, NULL, 1); else - sbi_remote_sfence_vma(NULL, 0, -1); + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); } struct flush_tlb_range_data { @@ -68,68 +93,62 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __ipi_flush_tlb_range(void *info) -{ - struct flush_tlb_range_data *d = info; - - local_flush_tlb_range(d->start, d->size, d->stride); -} - static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; - struct cpumask *cmask = mm_cpumask(mm); - unsigned int cpuid; + const struct cpumask *cmask; + unsigned long asid = FLUSH_TLB_NO_ASID; bool broadcast; - if (cpumask_empty(cmask)) - return; + if (mm) { + unsigned int cpuid; - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; - - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { - local_flush_tlb_range_asid(start, size, stride, asid); - } + cmask = mm_cpumask(mm); + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; } else { - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = 0; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range, - &ftd, 1); - } else - sbi_remote_sfence_vma(cmask, start, size); - } else { - local_flush_tlb_range(start, size, stride); - } + cmask = cpu_online_mask; + broadcast = true; } - put_cpu(); + if (broadcast) { + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); + } else { + local_flush_tlb_range_asid(start, size, stride, asid); + } + + if (mm) + put_cpu(); } void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} + +void flush_tlb_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned int page_size) +{ + __flush_tlb_range(mm, start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) @@ -140,8 +159,40 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); + unsigned long stride_size; + + if (!is_vm_hugetlb_page(vma)) { + stride_size = PAGE_SIZE; + } else { + stride_size = huge_page_size(hstate_vma(vma)); + + /* + * As stated in the privileged specification, every PTE in a + * NAPOT region must be invalidated, so reset the stride in that + * case. + */ + if (has_svnapot()) { + if (stride_size >= PGDIR_SIZE) + stride_size = PGDIR_SIZE; + else if (stride_size >= P4D_SIZE) + stride_size = P4D_SIZE; + else if (stride_size >= PUD_SIZE) + stride_size = PUD_SIZE; + else if (stride_size >= PMD_SIZE) + stride_size = PMD_SIZE; + else + stride_size = PAGE_SIZE; + } + } + + __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 9e6476719abb..280b0eb352b8 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -81,6 +81,14 @@ ifdef CONFIG_CFI_CLANG PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif +ifdef CONFIG_RELOCATABLE +PURGATORY_CFLAGS_REMOVE += -fPIE +endif + +ifdef CONFIG_SHADOW_CALL_STACK +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_SCS) +endif + CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0194f4554130..5bcf3af903da 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,15 +7,11 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ - -.macro size, sym:req - .size \sym, . - \sym -.endm +#include <linux/linkage.h> .text -.globl purgatory_start -purgatory_start: +SYM_CODE_START(purgatory_start) lla sp, .Lstack mv s0, a0 /* The hartid of the current hart */ @@ -28,8 +24,7 @@ purgatory_start: mv a1, s1 ld a2, riscv_kernel_entry jr a2 - -size purgatory_start +SYM_CODE_END(purgatory_start) .align 4 .rept 256 @@ -39,9 +34,6 @@ size purgatory_start .data -.globl riscv_kernel_entry -riscv_kernel_entry: - .quad 0 -size riscv_kernel_entry +SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b0d67ac8695f..3bec98d20283 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -236,6 +236,7 @@ config S390 select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select TTY + select USER_STACKTRACE_SUPPORT select VIRT_CPU_ACCOUNTING select ZONE_DMA # Note: keep the above list sorted alphabetically diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 7b7521762633..2ab4872fbee1 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/page-states.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); int __bootdata_preserved(__kaslr_enabled); +int __bootdata_preserved(cmma_flag) = 1; unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; unsigned long memory_limit; @@ -295,6 +297,12 @@ void parse_boot_command_line(void) if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; + if (!strcmp(param, "cmma")) { + rc = kstrtobool(val, &enabled); + if (!rc && !enabled) + cmma_flag = 0; + } + #if IS_ENABLED(CONFIG_KVM) if (!strcmp(param, "prot_virt")) { rc = kstrtobool(val, &enabled); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 8826c4f18645..8104e0e3d188 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/string.h> #include <linux/elf.h> +#include <asm/page-states.h> #include <asm/boot_data.h> #include <asm/sections.h> #include <asm/maccess.h> @@ -57,6 +58,48 @@ static void detect_facilities(void) machine.has_nx = 1; } +static int cmma_test_essa(void) +{ + unsigned long reg1, reg2, tmp = 0; + int rc = 1; + psw_t old; + + /* Test ESSA_GET_STATE */ + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" + " la %[rc],0\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [tmp] "=&d" (tmp), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [cmd] "i" (ESSA_GET_STATE) + : "cc", "memory"); + return rc; +} + +static void cmma_init(void) +{ + if (!cmma_flag) + return; + if (cmma_test_essa()) { + cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; +} + static void setup_lpp(void) { S390_lowcore.current_pid = 0; @@ -306,6 +349,7 @@ void startup_kernel(void) setup_boot_command_line(); parse_boot_command_line(); detect_facilities(); + cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 3075d65e112c..e3a4500a5a75 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -2,6 +2,7 @@ #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/facility.h> #include <asm/sections.h> @@ -70,6 +71,10 @@ static void kasan_populate_shadow(void) crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat(kasan_early_shadow_pte, 1); /* * Current memory layout: @@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val) table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); crst_table_init(table, val); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; } @@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void) if (!pte_leftover) { pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; + __arch_set_page_dat(pte, 1); } else { pte = pte_leftover; pte_leftover = NULL; @@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit) unsigned long asce_bits; int i; + /* + * Mark whole memory as no-dat. This must be done before any + * page tables are allocated, or kernel image builtin pages + * are marked as dat tables. + */ + for_each_physmem_online_range(i, &start, &end) + __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); + if (asce_limit == _REGION1_SIZE) { asce_type = _REGION2_ENTRY_EMPTY; asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit) crst_table_init((unsigned long *)swapper_pg_dir, asce_type); crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); + __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); + __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); /* * To allow prefixing the lowcore must be mapped with 4KB pages. diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 21b9e5290c04..01f1682a73b7 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -73,8 +73,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); #define flush_insn_slot(p) do { } while (0) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 829d68e2c685..bb1b4bef1878 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,6 @@ typedef struct { cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; - struct list_head pgtable_list; struct list_head gmap_list; unsigned long gmap_asce; unsigned long asce; @@ -39,7 +38,6 @@ typedef struct { #define INIT_MM_CONTEXT(name) \ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ - .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 757fe6f0d802..929af18b0908 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk, unsigned long asce_type, init_entry; spin_lock_init(&mm->context.lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index c33c4deb545f..08fcbd628120 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -7,6 +7,9 @@ #ifndef PAGE_STATES_H #define PAGE_STATES_H +#include <asm/sections.h> +#include <asm/page.h> + #define ESSA_GET_STATE 0 #define ESSA_SET_STABLE 1 #define ESSA_SET_UNUSED 2 @@ -18,4 +21,60 @@ #define ESSA_MAX ESSA_SET_STABLE_NODAT +extern int __bootdata_preserved(cmma_flag); + +static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd) +{ + unsigned long rc; + + asm volatile( + " .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0" + : [rc] "=d" (rc) + : [paddr] "d" (paddr), + [cmd] "i" (cmd)); + return rc; +} + +static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd) +{ + unsigned long paddr = __pa(addr) & PAGE_MASK; + + while (num_pages--) { + essa(paddr, cmd); + paddr += PAGE_SIZE; + } +} + +static inline void __set_page_unused(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_UNUSED); +} + +static inline void __set_page_stable_dat(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_STABLE); +} + +static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages) +{ + __set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT); +} + +static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages) +{ + if (!cmma_flag) + return; + if (cmma_flag < 2) + __set_page_stable_dat(addr, num_pages); + else + __set_page_stable_nodat(addr, num_pages); +} + +static inline void __arch_set_page_dat(void *addr, unsigned long num_pages) +{ + if (!cmma_flag) + return; + __set_page_stable_dat(addr, num_pages); +} + #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index cfec0743314e..73b9c3bf377f 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); -void arch_set_page_dat(struct page *page, int order); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b248694e0024..e91cd6bbc330 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -159,13 +159,6 @@ struct zpci_dev { unsigned long *dma_table; int tlb_refresh; - spinlock_t iommu_bitmap_lock; - unsigned long *iommu_bitmap; - unsigned long *lazy_bitmap; - unsigned long iommu_size; - unsigned long iommu_pages; - unsigned int next_bit; - struct iommu_device iommu_dev; /* IOMMU core handle */ char res_name[16]; @@ -180,10 +173,6 @@ struct zpci_dev { struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; - /* software counters */ - atomic64_t allocated_pages; - atomic64_t mapped_pages; - atomic64_t unmapped_pages; u8 version; enum pci_bus_speed max_bus_speed; diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index d6189ed14f84..f0c677ddd270 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -50,6 +50,9 @@ struct clp_fh_list_entry { #define CLP_UTIL_STR_LEN 64 #define CLP_PFIP_NR_SEGMENTS 4 +/* PCI function type numbers */ +#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */ + extern bool zpci_unique_uid; struct clp_rsp_slpc_pci { diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 7119c04c51c5..42d7cc4262ca 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -82,117 +82,16 @@ enum zpci_ioat_dtype { #define ZPCI_TABLE_VALID_MASK 0x20 #define ZPCI_TABLE_PROT_MASK 0x200 -static inline unsigned int calc_rtx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_sx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_px(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; -} - -static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) -{ - *entry &= ZPCI_PTE_FLAG_MASK; - *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); -} - -static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) -{ - *entry &= ZPCI_RTE_FLAG_MASK; - *entry |= (sto & ZPCI_RTE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_RTX; -} - -static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) -{ - *entry &= ZPCI_STE_FLAG_MASK; - *entry |= (pto & ZPCI_STE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_SX; -} - -static inline void validate_rt_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry &= ~ZPCI_TABLE_OFFSET_MASK; - *entry |= ZPCI_TABLE_VALID; - *entry |= ZPCI_TABLE_LEN_RTX; -} - -static inline void validate_st_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_VALID; -} - -static inline void invalidate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_INVALID; -} - -static inline void validate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_VALID; -} - -static inline void entry_set_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_PROTECTED; -} - -static inline void entry_clr_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_UNPROTECTED; -} - -static inline int reg_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; -} - -static inline int pt_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; -} - -static inline unsigned long *get_rt_sto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) - return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); - else - return NULL; - -} - -static inline unsigned long *get_st_pto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) - return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); - else - return NULL; -} - -/* Prototypes */ -void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(gfp_t gfp); -void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp); -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); - -extern const struct dma_map_ops s390_pci_dma_ops; +struct zpci_iommu_ctrs { + atomic64_t mapped_pages; + atomic64_t unmapped_pages; + atomic64_t global_rpcits; + atomic64_t sync_map_rpcits; + atomic64_t sync_rpcits; +}; + +struct zpci_dev; +struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev); #endif diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 376b4b23bdaa..502d655fe6ae 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); struct page *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); void page_table_free_pgste(struct page *page); extern int page_table_allocate_pgste; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 25cadc2b9cff..df316436d2e1 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { } void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); -void cmma_init(void); -void cmma_init_nodat(void); - extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); extern void (*_machine_power_off)(void); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 78f7b729b65f..31ec4f545e03 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -6,6 +6,13 @@ #include <linux/ptrace.h> #include <asm/switch_to.h> +struct stack_frame_user { + unsigned long back_chain; + unsigned long empty1[5]; + unsigned long gprs[10]; + unsigned long empty2[4]; +}; + enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 383b1f91442c..d1455a601adc 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - /* - * page_table_free_rcu takes care of the allocation bit masks - * of the 2K table fragments in the 4K page table page, - * then calls tlb_remove_table. - */ - page_table_free_rcu(tlb, (unsigned long *) pte, address); + if (mm_alloc_pgste(tlb->mm)) + gmap_unlink(tlb->mm, (unsigned long *)pte, address); + tlb_remove_ptdesc(tlb, pte); } /* @@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb_remove_table(tlb, p4d); + tlb_remove_ptdesc(tlb, p4d); } /* @@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; - tlb_remove_table(tlb, pud); + tlb_remove_ptdesc(tlb, pud); } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index ff1f02b54771..eb43e5922a25 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc); decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); +decompressor_handled_param(cmma); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index c27321cb0969..dfa77da2fd2e 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -15,7 +15,10 @@ #include <linux/export.h> #include <linux/seq_file.h> #include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/compat.h> #include <linux/sysfs.h> +#include <asm/stacktrace.h> #include <asm/irq.h> #include <asm/cpu_mf.h> #include <asm/lowcore.h> @@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, } } +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stack_frame_user __user *sf; + unsigned long ip, sp; + bool first = true; + + if (is_compat_task()) + return; + perf_callchain_store(entry, instruction_pointer(regs)); + sf = (void __user *)user_stack_pointer(regs); + pagefault_disable(); + while (entry->nr < entry->max_stack) { + if (__get_user(sp, &sf->back_chain)) + break; + if (__get_user(ip, &sf->gprs[8])) + break; + if (ip & 0x1) { + /* + * If the instruction address is invalid, and this + * is the first stack frame, assume r14 has not + * been written to the stack yet. Otherwise exit. + */ + if (first && !(regs->gprs[14] & 0x1)) + ip = regs->gprs[14]; + else + break; + } + perf_callchain_store(entry, ip); + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (!sp || sp & 0x7) + break; + sf = (void __user *)sp; + first = false; + } + pagefault_enable(); +} + /* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 0787010139f7..94f440e38303 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -6,9 +6,12 @@ */ #include <linux/stacktrace.h> +#include <linux/uaccess.h> +#include <linux/compat.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> +#include <asm/ptrace.h> void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, return -EINVAL; return 0; } + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + struct stack_frame_user __user *sf; + unsigned long ip, sp; + bool first = true; + + if (is_compat_task()) + return; + if (!consume_entry(cookie, instruction_pointer(regs))) + return; + sf = (void __user *)user_stack_pointer(regs); + pagefault_disable(); + while (1) { + if (__get_user(sp, &sf->back_chain)) + break; + if (__get_user(ip, &sf->gprs[8])) + break; + if (ip & 0x1) { + /* + * If the instruction address is invalid, and this + * is the first stack frame, assume r14 has not + * been written to the stack yet. Otherwise exit. + */ + if (first && !(regs->gprs[14] & 0x1)) + ip = regs->gprs[14]; + else + break; + } + if (!consume_entry(cookie, ip)) + break; + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (!sp || sp & 0x7) + break; + sf = (void __user *)sp; + first = false; + } + pagefault_enable(); +} diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 20786f6883b2..6f96b5a71c63 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -18,7 +18,7 @@ #include <linux/ksm.h> #include <linux/mman.h> #include <linux/pgtable.h> - +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/page.h> @@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void) page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return NULL; - arch_set_page_dat(page, CRST_ALLOC_ORDER); + __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); return page; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7eca10c32caa..43e612bc2bcd 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -164,14 +164,10 @@ void __init mem_init(void) pv_init(); kfence_split_mapping(); - /* Setup guest page hinting */ - cmma_init(); /* this will put all low memory onto the freelists */ memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ - - cmma_init_nodat(); } void free_initmem(void) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index a31acb2c4ef2..01f9b39e65f5 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -7,212 +7,18 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> #include <linux/mm.h> -#include <linux/memblock.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <asm/asm-extable.h> -#include <asm/facility.h> #include <asm/page-states.h> +#include <asm/sections.h> +#include <asm/page.h> -static int cmma_flag = 1; - -static int __init cmma(char *str) -{ - bool enabled; - - if (!kstrtobool(str, &enabled)) - cmma_flag = enabled; - return 1; -} -__setup("cmma=", cmma); - -static inline int cmma_test_essa(void) -{ - unsigned long tmp = 0; - int rc = -EOPNOTSUPP; - - /* test ESSA_GET_STATE */ - asm volatile( - " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - "0: la %[rc],0\n" - "1:\n" - EX_TABLE(0b,1b) - : [rc] "+&d" (rc), [tmp] "+&d" (tmp) - : [cmd] "i" (ESSA_GET_STATE)); - return rc; -} - -void __init cmma_init(void) -{ - if (!cmma_flag) - return; - if (cmma_test_essa()) { - cmma_flag = 0; - return; - } - if (test_facility(147)) - cmma_flag = 2; -} - -static inline void set_page_unused(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_UNUSED)); -} - -static inline void set_page_stable_dat(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE)); -} - -static inline void set_page_stable_nodat(struct page *page, int order) -{ - int i, rc; - - for (i = 0; i < (1 << order); i++) - asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" - : "=&d" (rc) - : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE_NODAT)); -} - -static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none(*pmd) || pmd_large(*pmd)) - continue; - page = phys_to_page(pmd_val(*pmd)); - set_bit(PG_arch_1, &page->flags); - } while (pmd++, addr = next, addr != end); -} - -static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - pud_t *pud; - int i; - - pud = pud_offset(p4d, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none(*pud) || pud_large(*pud)) - continue; - if (!pud_folded(*pud)) { - page = phys_to_page(pud_val(*pud)); - for (i = 0; i < 4; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_pmd(pud, addr, next); - } while (pud++, addr = next, addr != end); -} - -static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) -{ - unsigned long next; - struct page *page; - p4d_t *p4d; - int i; - - p4d = p4d_offset(pgd, addr); - do { - next = p4d_addr_end(addr, end); - if (p4d_none(*p4d)) - continue; - if (!p4d_folded(*p4d)) { - page = phys_to_page(p4d_val(*p4d)); - for (i = 0; i < 4; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_pud(p4d, addr, next); - } while (p4d++, addr = next, addr != end); -} - -static void mark_kernel_pgd(void) -{ - unsigned long addr, next, max_addr; - struct page *page; - pgd_t *pgd; - int i; - - addr = 0; - /* - * Figure out maximum virtual address accessible with the - * kernel ASCE. This is required to keep the page table walker - * from accessing non-existent entries. - */ - max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2; - max_addr = 1UL << (max_addr * 11 + 31); - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, max_addr); - if (pgd_none(*pgd)) - continue; - if (!pgd_folded(*pgd)) { - page = phys_to_page(pgd_val(*pgd)); - for (i = 0; i < 4; i++) - set_bit(PG_arch_1, &page[i].flags); - } - mark_kernel_p4d(pgd, addr, next); - } while (pgd++, addr = next, addr != max_addr); -} - -void __init cmma_init_nodat(void) -{ - struct page *page; - unsigned long start, end, ix; - int i; - - if (cmma_flag < 2) - return; - /* Mark pages used in kernel page tables */ - mark_kernel_pgd(); - page = virt_to_page(&swapper_pg_dir); - for (i = 0; i < 4; i++) - set_bit(PG_arch_1, &page[i].flags); - page = virt_to_page(&invalid_pg_dir); - for (i = 0; i < 4; i++) - set_bit(PG_arch_1, &page[i].flags); - - /* Set all kernel pages not used for page tables to stable/no-dat */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { - page = pfn_to_page(start); - for (ix = start; ix < end; ix++, page++) { - if (__test_and_clear_bit(PG_arch_1, &page->flags)) - continue; /* skip page table pages */ - if (!list_empty(&page->lru)) - continue; /* skip free pages */ - set_page_stable_nodat(page, 0); - } - } -} +int __bootdata_preserved(cmma_flag); void arch_free_page(struct page *page, int order) { if (!cmma_flag) return; - set_page_unused(page, order); + __set_page_unused(page_to_virt(page), 1UL << order); } void arch_alloc_page(struct page *page, int order) @@ -220,14 +26,7 @@ void arch_alloc_page(struct page *page, int order) if (!cmma_flag) return; if (cmma_flag < 2) - set_page_stable_dat(page, order); + __set_page_stable_dat(page_to_virt(page), 1UL << order); else - set_page_stable_nodat(page, order); -} - -void arch_set_page_dat(struct page *page, int order) -{ - if (!cmma_flag) - return; - set_page_stable_dat(page, order); + __set_page_stable_nodat(page_to_virt(page), 1UL << order); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 5488ae17318e..008e487c94a6 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/tlb.h> @@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); + unsigned long *table; if (!ptdesc) return NULL; - arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); - return (unsigned long *) ptdesc_to_virt(ptdesc); + table = ptdesc_to_virt(ptdesc); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); + return table; } void crst_table_free(struct mm_struct *mm, unsigned long *table) @@ -130,11 +133,6 @@ err_p4d: return -ENOMEM; } -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - return atomic_fetch_xor(bits, v) ^ bits; -} - #ifdef CONFIG_PGSTE struct page *page_table_alloc_pgste(struct mm_struct *mm) @@ -145,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (ptdesc) { table = (u64 *)ptdesc_to_virt(ptdesc); - arch_set_page_dat(virt_to_page(table), 0); + __arch_set_page_dat(table, 1); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } @@ -159,125 +157,11 @@ void page_table_free_pgste(struct page *page) #endif /* CONFIG_PGSTE */ -/* - * A 2KB-pgtable is either upper or lower half of a normal page. - * The second half of the page may be unused or used as another - * 2KB-pgtable. - * - * Whenever possible the parent page for a new 2KB-pgtable is picked - * from the list of partially allocated pages mm_context_t::pgtable_list. - * In case the list is empty a new parent page is allocated and added to - * the list. - * - * When a parent page gets fully allocated it contains 2KB-pgtables in both - * upper and lower halves and is removed from mm_context_t::pgtable_list. - * - * When 2KB-pgtable is freed from to fully allocated parent page that - * page turns partially allocated and added to mm_context_t::pgtable_list. - * - * If 2KB-pgtable is freed from the partially allocated parent page that - * page turns unused and gets removed from mm_context_t::pgtable_list. - * Furthermore, the unused parent page is released. - * - * As follows from the above, no unallocated or fully allocated parent - * pages are contained in mm_context_t::pgtable_list. - * - * The upper byte (bits 24-31) of the parent page _refcount is used - * for tracking contained 2KB-pgtables and has the following format: - * - * PP AA - * 01234567 upper byte (bits 24-31) of struct page::_refcount - * || || - * || |+--- upper 2KB-pgtable is allocated - * || +---- lower 2KB-pgtable is allocated - * |+------- upper 2KB-pgtable is pending for removal - * +-------- lower 2KB-pgtable is pending for removal - * - * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why - * using _refcount is possible). - * - * When 2KB-pgtable is allocated the corresponding AA bit is set to 1. - * The parent page is either: - * - added to mm_context_t::pgtable_list in case the second half of the - * parent page is still unallocated; - * - removed from mm_context_t::pgtable_list in case both hales of the - * parent page are allocated; - * These operations are protected with mm_context_t::lock. - * - * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0 - * and the corresponding PP bit is set to 1 in a single atomic operation. - * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually - * exclusive and may never be both set to 1! - * The parent page is either: - * - added to mm_context_t::pgtable_list in case the second half of the - * parent page is still allocated; - * - removed from mm_context_t::pgtable_list in case the second half of - * the parent page is unallocated; - * These operations are protected with mm_context_t::lock. - * - * It is important to understand that mm_context_t::lock only protects - * mm_context_t::pgtable_list and AA bits, but not the parent page itself - * and PP bits. - * - * Releasing the parent page happens whenever the PP bit turns from 1 to 0, - * while both AA bits and the second PP bit are already unset. Then the - * parent page does not contain any 2KB-pgtable fragment anymore, and it has - * also been removed from mm_context_t::pgtable_list. It is safe to release - * the page therefore. - * - * PGSTE memory spaces use full 4KB-pgtables and do not need most of the - * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable - * while the PP bits are never used, nor such a page is added to or removed - * from mm_context_t::pgtable_list. - * - * pte_free_defer() overrides those rules: it takes the page off pgtable_list, - * and prevents both 2K fragments from being reused. pte_free_defer() has to - * guarantee that its pgtable cannot be reused before the RCU grace period - * has elapsed (which page_table_free_rcu() does not actually guarantee). - * But for simplicity, because page->rcu_head overlays page->lru, and because - * the RCU callback might not be called before the mm_context_t has been freed, - * pte_free_defer() in this implementation prevents both fragments from being - * reused, and delays making the call to RCU until both fragments are freed. - */ unsigned long *page_table_alloc(struct mm_struct *mm) { - unsigned long *table; struct ptdesc *ptdesc; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.lock); - if (!list_empty(&mm->context.pgtable_list)) { - ptdesc = list_first_entry(&mm->context.pgtable_list, - struct ptdesc, pt_list); - mask = atomic_read(&ptdesc->_refcount) >> 24; - /* - * The pending removal bits must also be checked. - * Failure to do so might lead to an impossible - * value of (i.e 0x13 or 0x23) written to _refcount. - * Such values violate the assumption that pending and - * allocation bits are mutually exclusive, and the rest - * of the code unrails as result. That could lead to - * a whole bunch of races and corruptions. - */ - mask = (mask | (mask >> 4)) & 0x03U; - if (mask != 0x03U) { - table = (unsigned long *) ptdesc_to_virt(ptdesc); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&ptdesc->_refcount, - 0x01U << (bit + 24)); - list_del_init(&ptdesc->pt_list); - } - } - spin_unlock_bh(&mm->context.lock); - if (table) - return table; - } - /* Allocate a fresh page */ + unsigned long *table; + ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; @@ -285,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pagetable_free(ptdesc); return NULL; } - arch_set_page_dat(ptdesc_page(ptdesc), 0); - /* Initialize page table */ - table = (unsigned long *) ptdesc_to_virt(ptdesc); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - INIT_LIST_HEAD(&ptdesc->pt_list); - atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); - memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); - } else { - /* Return the first 2K fragment of the page */ - atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24); - memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); - spin_lock_bh(&mm->context.lock); - list_add(&ptdesc->pt_list, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.lock); - } + table = ptdesc_to_virt(ptdesc); + __arch_set_page_dat(table, 1); + /* pt_list is used by gmap only */ + INIT_LIST_HEAD(&ptdesc->pt_list); + memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); + memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; } -static void page_table_release_check(struct page *page, void *table, - unsigned int half, unsigned int mask) +static void pagetable_pte_dtor_free(struct ptdesc *ptdesc) { - char msg[128]; - - if (!IS_ENABLED(CONFIG_DEBUG_VM)) - return; - if (!mask && list_empty(&page->lru)) - return; - snprintf(msg, sizeof(msg), - "Invalid pgtable %p release half 0x%02x mask 0x%02x", - table, half, mask); - dump_page(page, msg); -} - -static void pte_free_now(struct rcu_head *head) -{ - struct ptdesc *ptdesc; - - ptdesc = container_of(head, struct ptdesc, pt_rcu_head); pagetable_pte_dtor(ptdesc); pagetable_free(ptdesc); } void page_table_free(struct mm_struct *mm, unsigned long *table) { - unsigned int mask, bit, half; struct ptdesc *ptdesc = virt_to_ptdesc(table); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.lock); - /* - * Mark the page for delayed release. The actual release - * will happen outside of the critical section from this - * function or from __tlb_remove_table() - */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); - mask >>= 24; - if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { - /* - * Other half is allocated, and neither half has had - * its free deferred: add page to head of list, to make - * this freed half available for immediate reuse. - */ - list_add(&ptdesc->pt_list, &mm->context.pgtable_list); - } else { - /* If page is on list, now remove it. */ - list_del_init(&ptdesc->pt_list); - } - spin_unlock_bh(&mm->context.lock); - mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24)); - mask >>= 24; - if (mask != 0x00U) - return; - half = 0x01U << bit; - } else { - half = 0x03U; - mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - mask >>= 24; - } - - page_table_release_check(ptdesc_page(ptdesc), table, half, mask); - if (folio_test_clear_active(ptdesc_folio(ptdesc))) - call_rcu(&ptdesc->pt_rcu_head, pte_free_now); - else - pte_free_now(&ptdesc->pt_rcu_head); + pagetable_pte_dtor_free(ptdesc); } -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) +void __tlb_remove_table(void *table) { - struct mm_struct *mm; - unsigned int bit, mask; struct ptdesc *ptdesc = virt_to_ptdesc(table); + struct page *page = ptdesc_page(ptdesc); - mm = tlb->mm; - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) ((unsigned long)table | 0x03U); - tlb_remove_ptdesc(tlb, table); + if (compound_order(page) == CRST_ALLOC_ORDER) { + /* pmd, pud, or p4d */ + pagetable_free(ptdesc); return; } - bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.lock); - /* - * Mark the page for delayed release. The actual release will happen - * outside of the critical section from __tlb_remove_table() or from - * page_table_free() - */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); - mask >>= 24; - if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { - /* - * Other half is allocated, and neither half has had - * its free deferred: add page to end of list, to make - * this freed half available for reuse once its pending - * bit has been cleared by __tlb_remove_table(). - */ - list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list); - } else { - /* If page is on list, now remove it. */ - list_del_init(&ptdesc->pt_list); - } - spin_unlock_bh(&mm->context.lock); - table = (unsigned long *) ((unsigned long) table | (0x01U << bit)); - tlb_remove_table(tlb, table); + pagetable_pte_dtor_free(ptdesc); } -void __tlb_remove_table(void *_table) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pte_free_now(struct rcu_head *head) { - unsigned int mask = (unsigned long) _table & 0x03U, half = mask; - void *table = (void *)((unsigned long) _table ^ mask); - struct ptdesc *ptdesc = virt_to_ptdesc(table); + struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head); - switch (half) { - case 0x00U: /* pmd, pud, or p4d */ - pagetable_free(ptdesc); - return; - case 0x01U: /* lower 2K of a 4K page table */ - case 0x02U: /* higher 2K of a 4K page table */ - mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24)); - mask >>= 24; - if (mask != 0x00U) - return; - break; - case 0x03U: /* 4K page table with pgstes */ - mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); - mask >>= 24; - break; - } - - page_table_release_check(ptdesc_page(ptdesc), table, half, mask); - if (folio_test_clear_active(ptdesc_folio(ptdesc))) - call_rcu(&ptdesc->pt_rcu_head, pte_free_now); - else - pte_free_now(&ptdesc->pt_rcu_head); + pagetable_pte_dtor_free(ptdesc); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) { - struct page *page; + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); - page = virt_to_page(pgtable); - SetPageActive(page); - page_table_free(mm, (unsigned long *)pgtable); + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); /* - * page_table_free() does not do the pgste gmap_unlink() which - * page_table_free_rcu() does: warn us if pgste ever reaches here. + * THPs are not allowed for KVM guests. Warn if pgste ever reaches here. + * Turn to the generic pte_free_defer() version once gmap is removed. */ WARN_ON_ONCE(mm_has_pgste(mm)); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 2e8a1064f103..186a020857cf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val) if (!table) return NULL; crst_table_init(table, val); - if (slab_is_available()) - arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER); + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; } @@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void) if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + __arch_set_page_dat(pte, 1); return pte; } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 5ae31ca9dd44..0547a10406e7 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -3,7 +3,7 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6fab5c085565..676ac74026a8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; - fib.pal = limit; + /* Work around off by one in ISM virt device */ + if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) + fib.pal = limit + (1 << 12); + else + fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); @@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) int zpci_fmb_enable_device(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_iommu_ctrs *ctrs; struct zpci_fib fib = {0}; u8 cc, status; @@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) WARN_ON((u64) zdev->fmb & 0xf); /* reset software counters */ - atomic64_set(&zdev->allocated_pages, 0); - atomic64_set(&zdev->mapped_pages, 0); - atomic64_set(&zdev->unmapped_pages, 0); + ctrs = zpci_get_iommu_ctrs(zdev); + if (ctrs) { + atomic64_set(&ctrs->mapped_pages, 0); + atomic64_set(&ctrs->unmapped_pages, 0); + atomic64_set(&ctrs->global_rpcits, 0); + atomic64_set(&ctrs->sync_map_rpcits, 0); + atomic64_set(&ctrs->sync_rpcits, 0); + } + fib.fmb_addr = virt_to_phys(zdev->fmb); fib.gd = zdev->gisa; @@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev) pdev->no_vf_scan = 1; pdev->dev.groups = zpci_attr_groups; - pdev->dev.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { @@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table), &status); - else - rc = zpci_dma_init_device(zdev); if (rc) { zpci_disable_device(zdev); return rc; @@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) { - rc = zpci_dma_exit_device(zdev); - if (rc) - return rc; - } if (zdev_enabled(zdev)) { rc = zpci_disable_device(zdev); if (rc) @@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); @@ -1109,10 +1110,6 @@ static int __init pci_base_init(void) if (rc) goto out_irq; - rc = zpci_dma_init(); - if (rc) - goto out_dma; - rc = clp_scan_pci_devices(); if (rc) goto out_find; @@ -1122,8 +1119,6 @@ static int __init pci_base_init(void) return 0; out_find: - zpci_dma_exit(); -out_dma: zpci_irq_exit(); out_irq: zpci_mem_exit(); diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 32245b970a0c..daa5d7450c7d 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) return rc; - rc = zpci_dma_init_device(zdev); - if (rc) { - zpci_disable_device(zdev); - return rc; - } } if (!zdev->has_resources) { diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index ca6bd98eec13..6dde2263c79d 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = { }; static char *pci_sw_names[] = { - "Allocated pages", "Mapped pages", "Unmapped pages", + "Global RPCITs", + "Sync Map RPCITs", + "Sync RPCITs", }; static void pci_fmb_show(struct seq_file *m, char *name[], int length, @@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length, static void pci_sw_counter_show(struct seq_file *m) { - struct zpci_dev *zdev = m->private; - atomic64_t *counter = &zdev->allocated_pages; + struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private); + atomic64_t *counter; int i; + if (!ctrs) + return; + + counter = &ctrs->mapped_pages; for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], atomic64_read(counter)); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c deleted file mode 100644 index 99209085c75b..000000000000 --- a/arch/s390/pci/pci_dma.c +++ /dev/null @@ -1,746 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber <jang@linux.vnet.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/export.h> -#include <linux/iommu-helper.h> -#include <linux/dma-map-ops.h> -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <asm/pci_dma.h> - -static struct kmem_cache *dma_region_table_cache; -static struct kmem_cache *dma_page_table_cache; -static int s390_iommu_strict; -static u64 s390_iommu_aperture; -static u32 s390_iommu_aperture_factor = 1; - -static int zpci_refresh_global(struct zpci_dev *zdev) -{ - return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, - zdev->iommu_pages * PAGE_SIZE); -} - -unsigned long *dma_alloc_cpu_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_region_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) - *entry = ZPCI_TABLE_INVALID; - return table; -} - -static void dma_free_cpu_table(void *table) -{ - kmem_cache_free(dma_region_table_cache, table); -} - -static unsigned long *dma_alloc_page_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_page_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) - *entry = ZPCI_PTE_INVALID; - return table; -} - -static void dma_free_page_table(void *table) -{ - kmem_cache_free(dma_page_table_cache, table); -} - -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) -{ - unsigned long old_rte, rte; - unsigned long *sto; - - rte = READ_ONCE(*rtep); - if (reg_entry_isvalid(rte)) { - sto = get_rt_sto(rte); - } else { - sto = dma_alloc_cpu_table(gfp); - if (!sto) - return NULL; - - set_rt_sto(&rte, virt_to_phys(sto)); - validate_rt_entry(&rte); - entry_clr_protected(&rte); - - old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); - if (old_rte != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_cpu_table(sto); - sto = get_rt_sto(old_rte); - } - } - return sto; -} - -static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) -{ - unsigned long old_ste, ste; - unsigned long *pto; - - ste = READ_ONCE(*step); - if (reg_entry_isvalid(ste)) { - pto = get_st_pto(ste); - } else { - pto = dma_alloc_page_table(gfp); - if (!pto) - return NULL; - set_st_pto(&ste, virt_to_phys(pto)); - validate_st_entry(&ste); - entry_clr_protected(&ste); - - old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); - if (old_ste != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_page_table(pto); - pto = get_st_pto(old_ste); - } - } - return pto; -} - -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp) -{ - unsigned long *sto, *pto; - unsigned int rtx, sx, px; - - rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx], gfp); - if (!sto) - return NULL; - - sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx], gfp); - if (!pto) - return NULL; - - px = calc_px(dma_addr); - return &pto[px]; -} - -void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) -{ - unsigned long pte; - - pte = READ_ONCE(*ptep); - if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(&pte); - } else { - set_pt_pfaa(&pte, page_addr); - validate_pt_entry(&pte); - } - - if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(&pte); - else - entry_clr_protected(&pte); - - xchg(ptep, pte); -} - -static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long *entry; - int i, rc = 0; - - if (!nr_pages) - return -EINVAL; - - if (!zdev->dma_table) - return -EINVAL; - - for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) { - rc = -ENOMEM; - goto undo_cpu_trans; - } - dma_update_cpu_trans(entry, page_addr, flags); - page_addr += PAGE_SIZE; - dma_addr += PAGE_SIZE; - } - -undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); - } - } - return rc; -} - -static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, - size_t size, int flags) -{ - unsigned long irqflags; - int ret; - - /* - * With zdev->tlb_refresh == 0, rpcit is not required to establish new - * translations when previously invalid translation-table entries are - * validated. With lazy unmap, rpcit is skipped for previously valid - * entries, but a global rpcit is then required before any address can - * be re-used, i.e. after each iommu bitmap wrap-around. - */ - if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { - if (!zdev->tlb_refresh) - return 0; - } else { - if (!s390_iommu_strict) - return 0; - } - - ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); - if (ret == -ENOMEM && !s390_iommu_strict) { - /* enable the hypervisor to free some resources */ - if (zpci_refresh_global(zdev)) - goto out; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); - ret = 0; - } -out: - return ret; -} - -static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - int rc; - - rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); - if (rc) - return rc; - - rc = __dma_purge_tlb(zdev, dma_addr, size, flags); - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); - - return rc; -} - -void dma_free_seg_table(unsigned long entry) -{ - unsigned long *sto = get_rt_sto(entry); - int sx; - - for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) - if (reg_entry_isvalid(sto[sx])) - dma_free_page_table(get_st_pto(sto[sx])); - - dma_free_cpu_table(sto); -} - -void dma_cleanup_tables(unsigned long *table) -{ - int rtx; - - if (!table) - return; - - for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) - if (reg_entry_isvalid(table[rtx])) - dma_free_seg_table(table[rtx]); - - dma_free_cpu_table(table); -} - -static unsigned long __dma_alloc_iommu(struct device *dev, - unsigned long start, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, - start, size, zdev->start_dma >> PAGE_SHIFT, - dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), - 0); -} - -static dma_addr_t dma_alloc_address(struct device *dev, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long offset, flags; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(dev, zdev->next_bit, size); - if (offset == -1) { - if (!s390_iommu_strict) { - /* global flush before DMA addresses are reused */ - if (zpci_refresh_global(zdev)) - goto out_error; - - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - } - /* wrap-around */ - offset = __dma_alloc_iommu(dev, 0, size); - if (offset == -1) - goto out_error; - } - zdev->next_bit = offset + size; - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - - return zdev->start_dma + offset * PAGE_SIZE; - -out_error: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return DMA_MAPPING_ERROR; -} - -static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long flags, offset; - - offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - if (!zdev->iommu_bitmap) - goto out; - - if (s390_iommu_strict) - bitmap_clear(zdev->iommu_bitmap, offset, size); - else - bitmap_set(zdev->lazy_bitmap, offset, size); - -out: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); -} - -static inline void zpci_err_dma(unsigned long rc, unsigned long addr) -{ - struct { - unsigned long rc; - unsigned long addr; - } __packed data = {rc, addr}; - - zpci_err_hex(&data, sizeof(data)); -} - -static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long pa = page_to_phys(page) + offset; - int flags = ZPCI_PTE_VALID; - unsigned long nr_pages; - dma_addr_t dma_addr; - int ret; - - /* This rounds up number of pages based on size and offset */ - nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - dma_addr = dma_alloc_address(dev, nr_pages); - if (dma_addr == DMA_MAPPING_ERROR) { - ret = -ENOSPC; - goto out_err; - } - - /* Use rounded up size */ - size = nr_pages * PAGE_SIZE; - - if (direction == DMA_NONE || direction == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - ret = dma_update_trans(zdev, pa, dma_addr, size, flags); - if (ret) - goto out_free; - - atomic64_add(nr_pages, &zdev->mapped_pages); - return dma_addr + (offset & ~PAGE_MASK); - -out_free: - dma_free_address(dev, dma_addr, nr_pages); -out_err: - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return DMA_MAPPING_ERROR; -} - -static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - int npages, ret; - - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - dma_addr = dma_addr & PAGE_MASK; - ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, - ZPCI_PTE_INVALID); - if (ret) { - zpci_err("unmap error:\n"); - zpci_err_dma(ret, dma_addr); - return; - } - - atomic64_add(npages, &zdev->unmapped_pages); - dma_free_address(dev, dma_addr, npages); -} - -static void *s390_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - struct page *page; - phys_addr_t pa; - dma_addr_t map; - - size = PAGE_ALIGN(size); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - pa = page_to_phys(page); - map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); - if (dma_mapping_error(dev, map)) { - __free_pages(page, get_order(size)); - return NULL; - } - - atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); - if (dma_handle) - *dma_handle = map; - return phys_to_virt(pa); -} - -static void s390_dma_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - size = PAGE_ALIGN(size); - atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); - s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); - free_pages((unsigned long)vaddr, get_order(size)); -} - -/* Map a segment into a contiguous dma address area */ -static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - size_t size, dma_addr_t *handle, - enum dma_data_direction dir) -{ - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - dma_addr_t dma_addr_base, dma_addr; - int flags = ZPCI_PTE_VALID; - struct scatterlist *s; - phys_addr_t pa = 0; - int ret; - - dma_addr_base = dma_alloc_address(dev, nr_pages); - if (dma_addr_base == DMA_MAPPING_ERROR) - return -ENOMEM; - - dma_addr = dma_addr_base; - if (dir == DMA_NONE || dir == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { - pa = page_to_phys(sg_page(s)); - ret = __dma_update_trans(zdev, pa, dma_addr, - s->offset + s->length, flags); - if (ret) - goto unmap; - - dma_addr += s->offset + s->length; - } - ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); - if (ret) - goto unmap; - - *handle = dma_addr_base; - atomic64_add(nr_pages, &zdev->mapped_pages); - - return ret; - -unmap: - dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, - ZPCI_PTE_INVALID); - dma_free_address(dev, dma_addr_base, nr_pages); - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return ret; -} - -static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s = sg, *start = sg, *dma = sg; - unsigned int max = dma_get_max_seg_size(dev); - unsigned int size = s->offset + s->length; - unsigned int offset = s->offset; - int count = 0, i, ret; - - for (i = 1; i < nr_elements; i++) { - s = sg_next(s); - - s->dma_length = 0; - - if (s->offset || (size & ~PAGE_MASK) || - size + s->length > max) { - ret = __s390_dma_map_sg(dev, start, size, - &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - size = offset = s->offset; - start = s; - dma = sg_next(dma); - count++; - } - size += s->length; - } - ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - return count + 1; -unmap: - for_each_sg(sg, s, count, i) - s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), - dir, attrs); - - return ret; -} - -static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nr_elements, i) { - if (s->dma_length) - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, - dir, attrs); - s->dma_address = 0; - s->dma_length = 0; - } -} - -static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags) -{ - size_t n = BITS_TO_LONGS(bits); - size_t bytes; - - if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes))) - return NULL; - - return vzalloc(bytes); -} - -int zpci_dma_init_device(struct zpci_dev *zdev) -{ - u8 status; - int rc; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - - spin_lock_init(&zdev->iommu_bitmap_lock); - - zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); - if (!zdev->dma_table) { - rc = -ENOMEM; - goto out; - } - - /* - * Restrict the iommu bitmap size to the minimum of the following: - * - s390_iommu_aperture which defaults to high_memory - * - 3-level pagetable address limit minus start_dma offset - * - DMA address range allowed by the hardware (clp query pci fn) - * - * Also set zdev->end_dma to the actual end address of the usable - * range, instead of the theoretical maximum as reported by hardware. - * - * This limits the number of concurrently usable DMA mappings since - * for each DMA mapped memory address we need a DMA address including - * extra DMA addresses for multiple mappings of the same memory address. - */ - zdev->start_dma = PAGE_ALIGN(zdev->start_dma); - zdev->iommu_size = min3(s390_iommu_aperture, - ZPCI_TABLE_SIZE_RT - zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); - zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; - zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->iommu_bitmap) { - rc = -ENOMEM; - goto free_dma_table; - } - if (!s390_iommu_strict) { - zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->lazy_bitmap) { - rc = -ENOMEM; - goto free_bitmap; - } - - } - if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status)) { - rc = -EIO; - goto free_bitmap; - } - - return 0; -free_bitmap: - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; -free_dma_table: - dma_free_cpu_table(zdev->dma_table); - zdev->dma_table = NULL; -out: - return rc; -} - -int zpci_dma_exit_device(struct zpci_dev *zdev) -{ - int cc = 0; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - if (zdev_enabled(zdev)) - cc = zpci_unregister_ioat(zdev, 0); - /* - * cc == 3 indicates the function is gone already. This can happen - * if the function was deconfigured/disabled suddenly and we have not - * received a new handle yet. - */ - if (cc && cc != 3) - return -EIO; - - dma_cleanup_tables(zdev->dma_table); - zdev->dma_table = NULL; - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; - zdev->next_bit = 0; - return 0; -} - -static int __init dma_alloc_cpu_table_caches(void) -{ - dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", - ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, - 0, NULL); - if (!dma_region_table_cache) - return -ENOMEM; - - dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", - ZPCI_PT_SIZE, ZPCI_PT_ALIGN, - 0, NULL); - if (!dma_page_table_cache) { - kmem_cache_destroy(dma_region_table_cache); - return -ENOMEM; - } - return 0; -} - -int __init zpci_dma_init(void) -{ - s390_iommu_aperture = (u64)virt_to_phys(high_memory); - if (!s390_iommu_aperture_factor) - s390_iommu_aperture = ULONG_MAX; - else - s390_iommu_aperture *= s390_iommu_aperture_factor; - - return dma_alloc_cpu_table_caches(); -} - -void zpci_dma_exit(void) -{ - kmem_cache_destroy(dma_page_table_cache); - kmem_cache_destroy(dma_region_table_cache); -} - -const struct dma_map_ops s390_pci_dma_ops = { - .alloc = s390_dma_alloc, - .free = s390_dma_free, - .map_sg = s390_dma_map_sg, - .unmap_sg = s390_dma_unmap_sg, - .map_page = s390_dma_map_pages, - .unmap_page = s390_dma_unmap_pages, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - /* dma_supported is unconditionally true without a callback */ -}; -EXPORT_SYMBOL_GPL(s390_pci_dma_ops); - -static int __init s390_iommu_setup(char *str) -{ - if (!strcmp(str, "strict")) - s390_iommu_strict = 1; - return 1; -} - -__setup("s390_iommu=", s390_iommu_setup); - -static int __init s390_iommu_aperture_setup(char *str) -{ - if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) - s390_iommu_aperture_factor = 1; - return 1; -} - -__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b9324ca2eb94..4d9773ef9e0a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) } } -static bool is_passed_through(struct zpci_dev *zdev) +static bool is_passed_through(struct pci_dev *pdev) { - return zdev->s390_domain; + struct zpci_dev *zdev = to_zpci(pdev); + bool ret; + + mutex_lock(&zdev->kzdev_lock); + ret = !!zdev->kzdev; + mutex_unlock(&zdev->kzdev_lock); + + return ret; } static bool is_driver_supported(struct pci_driver *driver) @@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) } pdev->error_state = pci_channel_io_frozen; - if (is_passed_through(to_zpci(pdev))) { + if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); goto out_unlock; @@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) * we will inject the error event and let the guest recover the device * itself. */ - if (is_passed_through(to_zpci(pdev))) + if (is_passed_through(pdev)) goto out; driver = to_pci_driver(pdev->dev.driver); if (driver && driver->err_handler && driver->err_handler->error_detected) @@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) /* Even though the device is already gone we still * need to free zPCI resources as part of the disable. */ - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index cae280e5c047..8a7abac51816 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); int ret = 0; + u8 status; /* Can't use device_remove_self() here as that would lead us to lock * the pci_rescan_remove_lock while holding the device' kernfs lock. @@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { pci_stop_and_remove_bus_device(pdev); - if (zdev->dma_table) { - ret = zpci_dma_exit_device(zdev); - if (ret) - goto out; - } - if (zdev_enabled(zdev)) { ret = zpci_disable_device(zdev); /* @@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ret = zpci_enable_device(zdev); if (ret) goto out; - ret = zpci_dma_init_device(zdev); - if (ret) { - zpci_disable_device(zdev); - goto out; + + if (zdev->dma_table) { + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + virt_to_phys(zdev->dma_table), &status); + if (ret) + zpci_disable_device(zdev); } - pci_rescan_bus(zdev->zbus->bus); } out: + pci_rescan_bus(zdev->zbus->bus); pci_unlock_rescan_remove(); if (kn) sysfs_unbreak_active_protection(kn); diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h index eeba83e0a7d2..65d4c3316a5b 100644 --- a/arch/sh/include/asm/kprobes.h +++ b/arch/sh/include/asm/kprobes.h @@ -46,8 +46,6 @@ struct kprobe_ctlblk { }; extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_handle_illslot(unsigned long pc); #else diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h index 06c2bc767ef7..aec742cd898f 100644 --- a/arch/sparc/include/asm/kprobes.h +++ b/arch/sparc/include/asm/kprobes.h @@ -47,8 +47,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; }; -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, struct pt_regs *regs); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index a2e9317aad49..5939694dfb28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -113,8 +113,6 @@ struct kprobe_ctlblk { }; extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_int3_handler(struct pt_regs *regs); #else diff --git a/block/blk-core.c b/block/blk-core.c index 9d51e9894ece..fdf25b8d6e78 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -501,8 +501,8 @@ static inline void bio_check_ro(struct bio *bio) if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return; - pr_warn("Trying to write to read-only block-device %pg\n", - bio->bi_bdev); + pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", + bio->bi_bdev); /* Older lvm-tools actually trigger this */ } } diff --git a/crypto/Kconfig b/crypto/Kconfig index bbf51d55724e..70661f58ee41 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1297,10 +1297,12 @@ config CRYPTO_JITTERENTROPY See https://www.chronox.de/jent.html +if CRYPTO_JITTERENTROPY +if CRYPTO_FIPS && EXPERT + choice prompt "CPU Jitter RNG Memory Size" default CRYPTO_JITTERENTROPY_MEMSIZE_2 - depends on CRYPTO_JITTERENTROPY help The Jitter RNG measures the execution time of memory accesses. Multiple consecutive memory accesses are performed. If the memory @@ -1344,7 +1346,6 @@ config CRYPTO_JITTERENTROPY_OSR int "CPU Jitter RNG Oversampling Rate" range 1 15 default 1 - depends on CRYPTO_JITTERENTROPY help The Jitter RNG allows the specification of an oversampling rate (OSR). The Jitter RNG operation requires a fixed amount of timing @@ -1359,7 +1360,6 @@ config CRYPTO_JITTERENTROPY_OSR config CRYPTO_JITTERENTROPY_TESTINTERFACE bool "CPU Jitter RNG Test Interface" - depends on CRYPTO_JITTERENTROPY help The test interface allows a privileged process to capture the raw unconditioned high resolution time stamp noise that @@ -1377,6 +1377,28 @@ config CRYPTO_JITTERENTROPY_TESTINTERFACE If unsure, select N. +endif # if CRYPTO_FIPS && EXPERT + +if !(CRYPTO_FIPS && EXPERT) + +config CRYPTO_JITTERENTROPY_MEMORY_BLOCKS + int + default 64 + +config CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE + int + default 32 + +config CRYPTO_JITTERENTROPY_OSR + int + default 1 + +config CRYPTO_JITTERENTROPY_TESTINTERFACE + bool + +endif # if !(CRYPTO_FIPS && EXPERT) +endif # if CRYPTO_JITTERENTROPY + config CRYPTO_KDF800108_CTR tristate select CRYPTO_HMAC diff --git a/crypto/ahash.c b/crypto/ahash.c index deee55f939dc..80c3e5354711 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -651,6 +651,7 @@ struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash) err = PTR_ERR(shash); goto out_free_nhash; } + nhash->using_shash = true; *nctx = shash; return nhash; } diff --git a/drivers/Kconfig b/drivers/Kconfig index 9826907eb06e..7bdad836fc62 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -135,8 +135,6 @@ source "drivers/uio/Kconfig" source "drivers/vfio/Kconfig" -source "drivers/vlynq/Kconfig" - source "drivers/virt/Kconfig" source "drivers/virtio/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 722d15be0eb7..d828329c268d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -151,7 +151,6 @@ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ obj-$(CONFIG_VHOST_IOTLB) += vhost/ obj-$(CONFIG_VHOST) += vhost/ -obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_GREYBUS) += greybus/ obj-$(CONFIG_COMEDI) += comedi/ obj-$(CONFIG_STAGING) += staging/ diff --git a/drivers/acpi/riscv/rhct.c b/drivers/acpi/riscv/rhct.c index b280b3e9c7d9..caa2c16e1697 100644 --- a/drivers/acpi/riscv/rhct.c +++ b/drivers/acpi/riscv/rhct.c @@ -8,8 +8,9 @@ #define pr_fmt(fmt) "ACPI: RHCT: " fmt #include <linux/acpi.h> +#include <linux/bits.h> -static struct acpi_table_header *acpi_get_rhct(void) +static struct acpi_table_rhct *acpi_get_rhct(void) { static struct acpi_table_header *rhct; acpi_status status; @@ -26,7 +27,7 @@ static struct acpi_table_header *acpi_get_rhct(void) } } - return rhct; + return (struct acpi_table_rhct *)rhct; } /* @@ -48,7 +49,7 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const BUG_ON(acpi_disabled); if (!table) { - rhct = (struct acpi_table_rhct *)acpi_get_rhct(); + rhct = acpi_get_rhct(); if (!rhct) return -ENOENT; } else { @@ -81,3 +82,89 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const return -1; } + +static void acpi_parse_hart_info_cmo_node(struct acpi_table_rhct *rhct, + struct acpi_rhct_hart_info *hart_info, + u32 *cbom_size, u32 *cboz_size, u32 *cbop_size) +{ + u32 size_hartinfo = sizeof(struct acpi_rhct_hart_info); + u32 size_hdr = sizeof(struct acpi_rhct_node_header); + struct acpi_rhct_node_header *ref_node; + struct acpi_rhct_cmo_node *cmo_node; + u32 *hart_info_node_offset; + + hart_info_node_offset = ACPI_ADD_PTR(u32, hart_info, size_hartinfo); + for (int i = 0; i < hart_info->num_offsets; i++) { + ref_node = ACPI_ADD_PTR(struct acpi_rhct_node_header, + rhct, hart_info_node_offset[i]); + if (ref_node->type == ACPI_RHCT_NODE_TYPE_CMO) { + cmo_node = ACPI_ADD_PTR(struct acpi_rhct_cmo_node, + ref_node, size_hdr); + if (cbom_size && cmo_node->cbom_size <= 30) { + if (!*cbom_size) + *cbom_size = BIT(cmo_node->cbom_size); + else if (*cbom_size != BIT(cmo_node->cbom_size)) + pr_warn("CBOM size is not the same across harts\n"); + } + + if (cboz_size && cmo_node->cboz_size <= 30) { + if (!*cboz_size) + *cboz_size = BIT(cmo_node->cboz_size); + else if (*cboz_size != BIT(cmo_node->cboz_size)) + pr_warn("CBOZ size is not the same across harts\n"); + } + + if (cbop_size && cmo_node->cbop_size <= 30) { + if (!*cbop_size) + *cbop_size = BIT(cmo_node->cbop_size); + else if (*cbop_size != BIT(cmo_node->cbop_size)) + pr_warn("CBOP size is not the same across harts\n"); + } + } + } +} + +/* + * During early boot, the caller should call acpi_get_table() and pass its pointer to + * these functions (and free up later). At run time, since this table can be used + * multiple times, pass NULL so that the table remains in memory. + */ +void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, + u32 *cboz_size, u32 *cbop_size) +{ + u32 size_hdr = sizeof(struct acpi_rhct_node_header); + struct acpi_rhct_node_header *node, *end; + struct acpi_rhct_hart_info *hart_info; + struct acpi_table_rhct *rhct; + + if (acpi_disabled) + return; + + if (table) { + rhct = (struct acpi_table_rhct *)table; + } else { + rhct = acpi_get_rhct(); + if (!rhct) + return; + } + + if (cbom_size) + *cbom_size = 0; + + if (cboz_size) + *cboz_size = 0; + + if (cbop_size) + *cbop_size = 0; + + end = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->header.length); + for (node = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->node_offset); + node < end; + node = ACPI_ADD_PTR(struct acpi_rhct_node_header, node, node->length)) { + if (node->type == ACPI_RHCT_NODE_TYPE_HART_INFO) { + hart_info = ACPI_ADD_PTR(struct acpi_rhct_hart_info, node, size_hdr); + acpi_parse_hart_info_cmo_node(rhct, hart_info, cbom_size, + cboz_size, cbop_size); + } + } +} diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6fb4e8dc8c3c..09ed67772fae 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6180,24 +6180,10 @@ EXPORT_SYMBOL_GPL(ata_pci_remove_one); void ata_pci_shutdown_one(struct pci_dev *pdev) { struct ata_host *host = pci_get_drvdata(pdev); - struct ata_port *ap; - unsigned long flags; int i; - /* Tell EH to disable all devices */ - for (i = 0; i < host->n_ports; i++) { - ap = host->ports[i]; - spin_lock_irqsave(ap->lock, flags); - ap->pflags |= ATA_PFLAG_UNLOADING; - ata_port_schedule_eh(ap); - spin_unlock_irqrestore(ap->lock, flags); - } - for (i = 0; i < host->n_ports; i++) { - ap = host->ports[i]; - - /* Wait for EH to complete before freezing the port */ - ata_port_wait_eh(ap); + struct ata_port *ap = host->ports[i]; ap->pflags |= ATA_PFLAG_FROZEN; diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c index 0c2ae430f5aa..18ceefd176df 100644 --- a/drivers/ata/pata_falcon.c +++ b/drivers/ata/pata_falcon.c @@ -121,7 +121,7 @@ static struct ata_port_operations pata_falcon_ops = { .set_mode = pata_falcon_set_mode, }; -static int __init pata_falcon_init_one(struct platform_device *pdev) +static int pata_falcon_init_one(struct platform_device *pdev) { struct resource *base_mem_res, *ctl_mem_res; struct resource *base_res, *ctl_res, *irq_res; @@ -216,23 +216,22 @@ static int __init pata_falcon_init_one(struct platform_device *pdev) IRQF_SHARED, &pata_falcon_sht); } -static int __exit pata_falcon_remove_one(struct platform_device *pdev) +static void pata_falcon_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); ata_host_detach(host); - - return 0; } static struct platform_driver pata_falcon_driver = { - .remove = __exit_p(pata_falcon_remove_one), + .probe = pata_falcon_init_one, + .remove_new = pata_falcon_remove_one, .driver = { .name = "atari-falcon-ide", }, }; -module_platform_driver_probe(pata_falcon_driver, pata_falcon_init_one); +module_platform_driver(pata_falcon_driver); MODULE_AUTHOR("Bartlomiej Zolnierkiewicz"); MODULE_DESCRIPTION("low-level driver for Atari Falcon PATA"); diff --git a/drivers/ata/pata_gayle.c b/drivers/ata/pata_gayle.c index 3bdbe2b65a2b..94df60ac2307 100644 --- a/drivers/ata/pata_gayle.c +++ b/drivers/ata/pata_gayle.c @@ -124,7 +124,7 @@ static struct ata_port_operations pata_gayle_a4000_ops = { .set_mode = pata_gayle_set_mode, }; -static int __init pata_gayle_init_one(struct platform_device *pdev) +static int pata_gayle_init_one(struct platform_device *pdev) { struct resource *res; struct gayle_ide_platform_data *pdata; @@ -193,23 +193,22 @@ static int __init pata_gayle_init_one(struct platform_device *pdev) return 0; } -static int __exit pata_gayle_remove_one(struct platform_device *pdev) +static void pata_gayle_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); ata_host_detach(host); - - return 0; } static struct platform_driver pata_gayle_driver = { - .remove = __exit_p(pata_gayle_remove_one), + .probe = pata_gayle_init_one, + .remove_new = pata_gayle_remove_one, .driver = { .name = "amiga-gayle-ide", }, }; -module_platform_driver_probe(pata_gayle_driver, pata_gayle_init_one); +module_platform_driver(pata_gayle_driver); MODULE_AUTHOR("Bartlomiej Zolnierkiewicz"); MODULE_DESCRIPTION("low-level driver for Amiga Gayle PATA"); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 234a84ecde8b..ea6157747199 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1620,17 +1620,19 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, } if (!map->cache_bypass && map->format.parse_val) { - unsigned int ival; + unsigned int ival, offset; int val_bytes = map->format.val_bytes; - for (i = 0; i < val_len / val_bytes; i++) { - ival = map->format.parse_val(val + (i * val_bytes)); - ret = regcache_write(map, - reg + regmap_get_offset(map, i), - ival); + + /* Cache the last written value for noinc writes */ + i = noinc ? val_len - val_bytes : 0; + for (; i < val_len; i += val_bytes) { + ival = map->format.parse_val(val + i); + offset = noinc ? 0 : regmap_get_offset(map, i / val_bytes); + ret = regcache_write(map, reg + offset, ival); if (ret) { dev_err(map->dev, "Error in caching of register: %x ret: %d\n", - reg + regmap_get_offset(map, i), ret); + reg + offset, ret); return ret; } } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 800f131222fc..855fdf5c3b4e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -250,7 +250,6 @@ static void nbd_dev_remove(struct nbd_device *nbd) struct gendisk *disk = nbd->disk; del_gendisk(disk); - put_disk(disk); blk_mq_free_tag_set(&nbd->tag_set); /* @@ -261,7 +260,7 @@ static void nbd_dev_remove(struct nbd_device *nbd) idr_remove(&nbd_index_idr, nbd->index); mutex_unlock(&nbd_index_mutex); destroy_workqueue(nbd->recv_workq); - kfree(nbd); + put_disk(disk); } static void nbd_dev_remove_work(struct work_struct *work) @@ -1608,6 +1607,13 @@ static void nbd_release(struct gendisk *disk) nbd_put(nbd); } +static void nbd_free_disk(struct gendisk *disk) +{ + struct nbd_device *nbd = disk->private_data; + + kfree(nbd); +} + static const struct block_device_operations nbd_fops = { .owner = THIS_MODULE, @@ -1615,6 +1621,7 @@ static const struct block_device_operations nbd_fops = .release = nbd_release, .ioctl = nbd_ioctl, .compat_ioctl = nbd_ioctl, + .free_disk = nbd_free_disk, }; #if IS_ENABLED(CONFIG_DEBUG_FS) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 50198657230e..57857c0dfba9 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -22,21 +22,32 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/interrupt.h> #include <linux/of_irq.h> +#include <linux/limits.h> #include <clocksource/timer-riscv.h> #include <asm/smp.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #include <asm/sbi.h> #include <asm/timex.h> static DEFINE_STATIC_KEY_FALSE(riscv_sstc_available); static bool riscv_timer_cannot_wake_cpu; +static void riscv_clock_event_stop(void) +{ + if (static_branch_likely(&riscv_sstc_available)) { + csr_write(CSR_STIMECMP, ULONG_MAX); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_STIMECMPH, ULONG_MAX); + } else { + sbi_set_timer(U64_MAX); + } +} + static int riscv_clock_next_event(unsigned long delta, struct clock_event_device *ce) { u64 next_tval = get_cycles64() + delta; - csr_set(CSR_IE, IE_TIE); if (static_branch_likely(&riscv_sstc_available)) { #if defined(CONFIG_32BIT) csr_write(CSR_STIMECMP, next_tval & 0xFFFFFFFF); @@ -94,6 +105,8 @@ static int riscv_timer_starting_cpu(unsigned int cpu) ce->irq = riscv_clock_event_irq; if (riscv_timer_cannot_wake_cpu) ce->features |= CLOCK_EVT_FEAT_C3STOP; + if (static_branch_likely(&riscv_sstc_available)) + ce->rating = 450; clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff); enable_percpu_irq(riscv_clock_event_irq, @@ -119,7 +132,7 @@ static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event); - csr_clear(CSR_IE, IE_TIE); + riscv_clock_event_stop(); evdev->event_handler(evdev); return IRQ_HANDLED; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 11b3e34b7696..bd1e1357cef8 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -180,8 +180,11 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "ti,am62a7", }, { .compatible = "ti,am62p5", }, + { .compatible = "qcom,ipq5332", }, { .compatible = "qcom,ipq6018", }, { .compatible = "qcom,ipq8064", }, + { .compatible = "qcom,ipq8074", }, + { .compatible = "qcom,ipq9574", }, { .compatible = "qcom,apq8064", }, { .compatible = "qcom,msm8974", }, { .compatible = "qcom,msm8960", }, diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 15367ac08b2b..6355a39418c5 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -38,6 +38,11 @@ enum ipq806x_versions { #define IPQ6000_VERSION BIT(2) +enum ipq8074_versions { + IPQ8074_HAWKEYE_VERSION = 0, + IPQ8074_ACORN_VERSION, +}; + struct qcom_cpufreq_drv; struct qcom_cpufreq_match_data { @@ -178,6 +183,16 @@ static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, switch (msm_id) { case QCOM_ID_MSM8996: case QCOM_ID_APQ8096: + case QCOM_ID_IPQ5332: + case QCOM_ID_IPQ5322: + case QCOM_ID_IPQ5312: + case QCOM_ID_IPQ5302: + case QCOM_ID_IPQ5300: + case QCOM_ID_IPQ9514: + case QCOM_ID_IPQ9550: + case QCOM_ID_IPQ9554: + case QCOM_ID_IPQ9570: + case QCOM_ID_IPQ9574: drv->versions = 1 << (unsigned int)(*speedbin); break; case QCOM_ID_MSM8996SG: @@ -338,6 +353,44 @@ static int qcom_cpufreq_ipq6018_name_version(struct device *cpu_dev, return 0; } +static int qcom_cpufreq_ipq8074_name_version(struct device *cpu_dev, + struct nvmem_cell *speedbin_nvmem, + char **pvs_name, + struct qcom_cpufreq_drv *drv) +{ + u32 msm_id; + int ret; + *pvs_name = NULL; + + ret = qcom_smem_get_soc_id(&msm_id); + if (ret) + return ret; + + switch (msm_id) { + case QCOM_ID_IPQ8070A: + case QCOM_ID_IPQ8071A: + case QCOM_ID_IPQ8172: + case QCOM_ID_IPQ8173: + case QCOM_ID_IPQ8174: + drv->versions = BIT(IPQ8074_ACORN_VERSION); + break; + case QCOM_ID_IPQ8072A: + case QCOM_ID_IPQ8074A: + case QCOM_ID_IPQ8076A: + case QCOM_ID_IPQ8078A: + drv->versions = BIT(IPQ8074_HAWKEYE_VERSION); + break; + default: + dev_err(cpu_dev, + "SoC ID %u is not part of IPQ8074 family, limiting to 1.4GHz!\n", + msm_id); + drv->versions = BIT(IPQ8074_ACORN_VERSION); + break; + } + + return 0; +} + static const char *generic_genpd_names[] = { "perf", NULL }; static const struct qcom_cpufreq_match_data match_data_kryo = { @@ -367,6 +420,10 @@ static const struct qcom_cpufreq_match_data match_data_ipq8064 = { .get_version = qcom_cpufreq_ipq8064_name_version, }; +static const struct qcom_cpufreq_match_data match_data_ipq8074 = { + .get_version = qcom_cpufreq_ipq8074_name_version, +}; + static int qcom_cpufreq_probe(struct platform_device *pdev) { struct qcom_cpufreq_drv *drv; @@ -494,9 +551,12 @@ static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { { .compatible = "qcom,msm8909", .data = &match_data_msm8909 }, { .compatible = "qcom,msm8996", .data = &match_data_kryo }, { .compatible = "qcom,qcs404", .data = &match_data_qcs404 }, + { .compatible = "qcom,ipq5332", .data = &match_data_kryo }, { .compatible = "qcom,ipq6018", .data = &match_data_ipq6018 }, { .compatible = "qcom,ipq8064", .data = &match_data_ipq8064 }, + { .compatible = "qcom,ipq8074", .data = &match_data_ipq8074 }, { .compatible = "qcom,apq8064", .data = &match_data_krait }, + { .compatible = "qcom,ipq9574", .data = &match_data_kryo }, { .compatible = "qcom,msm8974", .data = &match_data_krait }, { .compatible = "qcom,msm8960", .data = &match_data_krait }, {}, diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef4c12f0877b..06964a3c130f 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic +cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 58f107194fda..04c03402db6d 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -750,12 +750,12 @@ static int aspeed_gpio_request(struct gpio_chip *chip, unsigned int offset) if (!have_gpio(gpiochip_get_data(chip), offset)) return -ENODEV; - return pinctrl_gpio_request(chip->base + offset); + return pinctrl_gpio_request(chip, offset); } static void aspeed_gpio_free(struct gpio_chip *chip, unsigned int offset) { - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); } static int usecs_to_cycles(struct aspeed_gpio *gpio, unsigned long usecs, @@ -973,7 +973,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset, else if (param == PIN_CONFIG_BIAS_DISABLE || param == PIN_CONFIG_BIAS_PULL_DOWN || param == PIN_CONFIG_DRIVE_STRENGTH) - return pinctrl_gpio_set_config(chip->base + offset, config); + return pinctrl_gpio_set_config(chip, offset, config); else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN || param == PIN_CONFIG_DRIVE_OPEN_SOURCE) /* Return -ENOTSUPP to trigger emulation, as per datasheet */ diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index 858e6ebbb584..6c862c572322 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -227,14 +227,9 @@ static int em_gio_to_irq(struct gpio_chip *chip, unsigned offset) return irq_create_mapping(gpio_to_priv(chip)->irq_domain, offset); } -static int em_gio_request(struct gpio_chip *chip, unsigned offset) -{ - return pinctrl_gpio_request(chip->base + offset); -} - static void em_gio_free(struct gpio_chip *chip, unsigned offset) { - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); /* Set the GPIO as an input to ensure that the next GPIO request won't * drive the GPIO pin as an output. @@ -311,7 +306,7 @@ static int em_gio_probe(struct platform_device *pdev) gpio_chip->direction_output = em_gio_direction_output; gpio_chip->set = em_gio_set; gpio_chip->to_irq = em_gio_to_irq; - gpio_chip->request = em_gio_request; + gpio_chip->request = pinctrl_gpio_request; gpio_chip->free = em_gio_free; gpio_chip->label = name; gpio_chip->parent = dev; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 8f80ca8ec1ed..a13f3c18ccd4 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -346,7 +346,7 @@ static int mvebu_gpio_direction_input(struct gpio_chip *chip, unsigned int pin) * Check with the pinctrl driver whether this pin is usable as * an input GPIO */ - ret = pinctrl_gpio_direction_input(chip->base + pin); + ret = pinctrl_gpio_direction_input(chip, pin); if (ret) return ret; @@ -366,7 +366,7 @@ static int mvebu_gpio_direction_output(struct gpio_chip *chip, unsigned int pin, * Check with the pinctrl driver whether this pin is usable as * an output GPIO */ - ret = pinctrl_gpio_direction_output(chip->base + pin); + ret = pinctrl_gpio_direction_output(chip, pin); if (ret) return ret; @@ -757,7 +757,6 @@ static const struct pwm_ops mvebu_pwm_ops = { .free = mvebu_pwm_free, .get_state = mvebu_pwm_get_state, .apply = mvebu_pwm_apply, - .owner = THIS_MODULE, }; static void __maybe_unused mvebu_pwm_suspend(struct mvebu_gpio_chip *mvchip) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index cae9661862fe..91cea97255fa 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -260,7 +260,7 @@ static int pxa_gpio_direction_input(struct gpio_chip *chip, unsigned offset) int ret; if (pxa_gpio_has_pinctrl()) { - ret = pinctrl_gpio_direction_input(chip->base + offset); + ret = pinctrl_gpio_direction_input(chip, offset); if (ret) return ret; } @@ -289,7 +289,7 @@ static int pxa_gpio_direction_output(struct gpio_chip *chip, writel_relaxed(mask, base + (value ? GPSR_OFFSET : GPCR_OFFSET)); if (pxa_gpio_has_pinctrl()) { - ret = pinctrl_gpio_direction_output(chip->base + offset); + ret = pinctrl_gpio_direction_output(chip, offset); if (ret) return ret; } diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index d8b1baae6357..6159fda38d5d 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -275,7 +275,7 @@ static int gpio_rcar_request(struct gpio_chip *chip, unsigned offset) return error; } - error = pinctrl_gpio_request(chip->base + offset); + error = pinctrl_gpio_request(chip, offset); if (error) pm_runtime_put(p->dev); @@ -286,7 +286,7 @@ static void gpio_rcar_free(struct gpio_chip *chip, unsigned offset) { struct gpio_rcar_priv *p = gpiochip_get_data(chip); - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); /* * Set the GPIO as an input to ensure that the next GPIO request won't diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 23040a8cea34..0bd339813110 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -159,9 +159,9 @@ static int rockchip_gpio_set_direction(struct gpio_chip *chip, if (input) - pinctrl_gpio_direction_input(bank->pin_base + offset); + pinctrl_gpio_direction_input(chip, offset); else - pinctrl_gpio_direction_output(bank->pin_base + offset); + pinctrl_gpio_direction_output(chip, offset); raw_spin_lock_irqsave(&bank->slock, flags); rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr); diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index ea715582bcf3..ea5f9cc14bc4 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -137,16 +137,11 @@ static void tegra_gpio_disable(struct tegra_gpio_info *tgi, unsigned int gpio) tegra_gpio_mask_write(tgi, GPIO_MSK_CNF(tgi, gpio), gpio, 0); } -static int tegra_gpio_request(struct gpio_chip *chip, unsigned int offset) -{ - return pinctrl_gpio_request(chip->base + offset); -} - static void tegra_gpio_free(struct gpio_chip *chip, unsigned int offset) { struct tegra_gpio_info *tgi = gpiochip_get_data(chip); - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); tegra_gpio_disable(tgi, offset); } @@ -179,7 +174,7 @@ static int tegra_gpio_direction_input(struct gpio_chip *chip, tegra_gpio_mask_write(tgi, GPIO_MSK_OE(tgi, offset), offset, 0); tegra_gpio_enable(tgi, offset); - ret = pinctrl_gpio_direction_input(chip->base + offset); + ret = pinctrl_gpio_direction_input(chip, offset); if (ret < 0) dev_err(tgi->dev, "Failed to set pinctrl input direction of GPIO %d: %d", @@ -199,7 +194,7 @@ static int tegra_gpio_direction_output(struct gpio_chip *chip, tegra_gpio_mask_write(tgi, GPIO_MSK_OE(tgi, offset), offset, 1); tegra_gpio_enable(tgi, offset); - ret = pinctrl_gpio_direction_output(chip->base + offset); + ret = pinctrl_gpio_direction_output(chip, offset); if (ret < 0) dev_err(tgi->dev, "Failed to set pinctrl output direction of GPIO %d: %d", @@ -717,7 +712,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) } tgi->gc.label = "tegra-gpio"; - tgi->gc.request = tegra_gpio_request; + tgi->gc.request = pinctrl_gpio_request; tgi->gc.free = tegra_gpio_free; tgi->gc.direction_input = tegra_gpio_direction_input; tgi->gc.get = tegra_gpio_get; diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 444501c56a3b..07e5e6323e86 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -130,7 +130,7 @@ static int vf610_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); } - return pinctrl_gpio_direction_input(chip->base + gpio); + return pinctrl_gpio_direction_input(chip, gpio); } static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, @@ -148,7 +148,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); } - return pinctrl_gpio_direction_output(chip->base + gpio); + return pinctrl_gpio_direction_output(chip, gpio); } static void vf610_gpio_irq_handler(struct irq_desc *desc) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 31fc71a612c2..02ffda6c1e51 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -2287,8 +2287,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc, * FIXME: find a non-racy way to retrieve this information. Maybe a * lock common to both frameworks? */ - ok_for_pinctrl = - pinctrl_gpio_can_use_line(gc->base + info->offset); + ok_for_pinctrl = pinctrl_gpio_can_use_line(gc, info->offset); spin_lock_irqsave(&gpio_lock, flags); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index cbafcd95243e..95d2a7b2ea3e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1092,28 +1092,6 @@ void gpiochip_remove(struct gpio_chip *gc) } EXPORT_SYMBOL_GPL(gpiochip_remove); -/* - * FIXME: This will be removed soon. - * - * This function is depracated, don't use. - */ -struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, - void *data)) -{ - struct gpio_device *gdev; - struct gpio_chip *gc = NULL; - - gdev = gpio_device_find(data, match); - if (gdev) { - gc = gdev->chip; - gpio_device_put(gdev); - } - - return gc; -} -EXPORT_SYMBOL_GPL(gpiochip_find); - /** * gpio_device_find() - find a specific GPIO device * @data: data to pass to match function @@ -2036,7 +2014,7 @@ int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset) return 0; #endif - return pinctrl_gpio_request(gc->gpiodev->base + offset); + return pinctrl_gpio_request(gc, offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_request); @@ -2052,7 +2030,7 @@ void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset) return; #endif - pinctrl_gpio_free(gc->gpiodev->base + offset); + pinctrl_gpio_free(gc, offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_free); @@ -2065,7 +2043,7 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config) { - return pinctrl_gpio_set_config(gc->gpiodev->base + offset, config); + return pinctrl_gpio_set_config(gc, offset, config); } EXPORT_SYMBOL_GPL(gpiochip_generic_config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 91820838b63b..afec09930efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -363,9 +363,6 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; -#define HW_REV(_Major, _Minor, _Rev) \ - ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) - struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -1162,11 +1159,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1207,8 +1211,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1218,6 +1222,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4da82fc64fef..2deebece810e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1494,6 +1494,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) if (adev->asic_type < CHIP_RAVEN) return false; + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return false; + /* * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally * risky to do any special firmware-related preparations for entering diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..f6598b9e4faa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -300,14 +300,13 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch( VALID, 1); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_high); + watch_address_high, inst); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_low); + watch_address_low, inst); return watch_address_cntl; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..00fbc0f44c92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ kgd_gfx_v9_unlock_srbm(adev, inst); @@ -239,14 +239,13 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); - WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + upper_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -908,8 +906,8 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t inst) { - *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - mmCP_IQ_WAIT_TIME2)); + *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst), + mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1eccad4ce243..41fbc4fd0fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -425,6 +425,32 @@ validate_fail: return ret; } +static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + int ret = amdgpu_bo_reserve(bo, false); + + if (ret) + return ret; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) + goto unreserve_out; + + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + if (ret) + goto unreserve_out; + + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + +unreserve_out: + amdgpu_bo_unreserve(bo); + + return ret; +} + static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ -1784,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } else { + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_validate_bo; } if (offset) @@ -1793,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: +err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: @@ -1866,10 +1902,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (unlikely(ret)) return ret; - /* The eviction fence should be removed by the last unmap. - * TODO: Log an error condition if the bo still has the eviction fence - * attached - */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, @@ -1998,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out_unreserve; - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. - */ - ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (ret) { - pr_debug("Validate failed\n"); - goto out_unreserve; - } - } - list_for_each_entry(entry, &mem->attachments, list) { if (entry->bo_va->base.vm != avm || entry->is_mapped) continue; @@ -2037,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( mem->mapped_to_gpu_memory); } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - dma_resv_add_fence(bo->tbo.base.resv, - &avm->process_info->eviction_fence->base, - DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2074,7 +2089,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; @@ -2115,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->mapped_to_gpu_memory); } - /* If BO is unmapped from all VMs, unfence it. It can be evicted if - * required. - */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && - !mem->bo->tbo.pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence); - unreserve_out: unreserve_bo_and_vms(&ctx, false, false); out: @@ -2351,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, amdgpu_sync_create(&(*mem)->sync); (*mem)->is_imported = true; + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_remove_mem; + return 0; +err_remove_mem: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); err_put_obj: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 5bbb23e102ba..618e469e3622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "atom.h" +#include <linux/device.h> #include <linux/pci.h> #include <linux/slab.h> #include <linux/acpi.h> @@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return false; + /* ATRM is for on-platform devices only */ + if (dev_is_removable(&adev->pdev->dev)) + return false; + while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) && (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 781e5c5ce04d..702f6610d024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -172,6 +172,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, } rcu_read_unlock(); + *result = NULL; return -ENOENT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e210fe5c22a0..df3ecfa9e13f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1117,6 +1117,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* FIXME: In theory this loop shouldn't be needed any more when + * amdgpu_vm_handle_moved handles all moved BOs that are reserved + * with p->ticket. But removing it caused test regressions, so I'm + * leaving it here for now. + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { bo_va = e->bo_va; if (bo_va == NULL) @@ -1131,7 +1136,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket); if (r) return r; @@ -1410,7 +1415,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_ERROR("Failed to process the buffer list %d!\n", r); + DRM_DEBUG("Failed to process the buffer list %d!\n", r); goto error_fini; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d5f78179b2b6..7eeaf0aa7f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -41,6 +41,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> +#include <linux/device.h> #include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/efi.h> @@ -72,6 +73,7 @@ #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" +#include "amdgpu_virt.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -471,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg); + ret = amdgpu_kiq_rreg(adev, reg, 0); up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -508,6 +510,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) BUG(); } + +/** + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id) +{ + uint32_t ret, rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, false, + &rlcg_flag)) { + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = adev->pcie_rreg(adev, reg * 4); + } + + return ret; +} + /* * MMIO register write with bytes helper functions * @offset:bytes offset from MMIO start @@ -555,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v); + amdgpu_kiq_wreg(adev, reg, v, 0); up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -597,6 +642,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, } /** + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Writes the value specified to the offset specified. + */ +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, uint32_t xcc_id) +{ + uint32_t rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, true, + &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + adev->pcie_wreg(adev, reg * 4, v); + } +} + +/** * amdgpu_device_indirect_rreg - read an indirect register * * @adev: amdgpu_device pointer @@ -1073,6 +1159,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); + /* TODO: check the return val and stop device initialization if boot fails */ + amdgpu_psp_query_boot_status(adev); return ret; } else { return amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -2223,7 +2311,6 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { - struct drm_device *dev = adev_to_drm(adev); struct pci_dev *parent; int i, r; bool total; @@ -2294,7 +2381,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((adev->flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + !dev_is_removable(&adev->pdev->dev)) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { @@ -2497,6 +2584,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) ring->name); return r; } + r = amdgpu_uvd_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", + ring->name); + return r; + } + r = amdgpu_vce_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", + ring->name); + return r; + } } amdgpu_xcp_update_partition_sched_list(adev); @@ -3962,13 +4061,23 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): + r = psp_gpu_reset(adev); + break; + default: + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + break; + } + if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; @@ -4132,7 +4241,7 @@ fence_driver_init: px = amdgpu_device_supports_px(ddev); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, px); @@ -4282,7 +4391,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) px = amdgpu_device_supports_px(adev_to_drm(adev)); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_unregister_client(adev->pdev); @@ -4474,19 +4583,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } amdgpu_fence_driver_hw_init(adev); - r = amdgpu_device_ip_late_init(adev); - if (r) - goto exit; - - queue_delayed_work(system_wq, &adev->delayed_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); - if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; } + r = amdgpu_device_ip_late_init(adev); + if (r) + goto exit; + + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); @@ -5566,10 +5674,6 @@ skip_hw_reset: drm_sched_start(&ring->sched, true); } - if (adev->enable_mes && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3)) - amdgpu_mes_self_test(tmp_adev); - if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b6a53e8429b2..0431eafa86b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -99,6 +99,7 @@ MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); #define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 @@ -239,8 +240,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { - uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - int ret = 0; + uint64_t vram_size; + u32 msg; + int i, ret = 0; + + /* It can take up to a second for IFWI init to complete on some dGPUs, + * but generally it should be in the 60-100ms range. Normally this starts + * as soon as the device gets power so by the time the OS loads this has long + * completed. However, when a card is hotplugged via e.g., USB4, we need to + * wait for this to complete. Once the C2PMSG is updated, we can + * continue. + */ + if (dev_is_removable(&adev->pdev->dev)) { + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + msleep(1); + } + } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; @@ -2449,6 +2468,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) adev->gmc.xgmi.supported = true; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); + /* set NBIO version */ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index b5e28fa3f414..e7e87a3b2601 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) if (!r) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6cc6e3991410..3095a3a864af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2041,6 +2041,14 @@ static const struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); +static const struct amdgpu_asic_type_quirk asic_type_quirks[] = { + /* differentiate between P10 and P11 asics with the same DID */ + {0x67FF, 0xE3, CHIP_POLARIS10}, + {0x67FF, 0xE7, CHIP_POLARIS10}, + {0x67FF, 0xF3, CHIP_POLARIS10}, + {0x67FF, 0xF7, CHIP_POLARIS10}, +}; + static const struct drm_driver amdgpu_kms_driver; static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) @@ -2083,6 +2091,22 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) } } +static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) { + if (pdev->device == asic_type_quirks[i].device && + pdev->revision == asic_type_quirks[i].revision) { + flags &= ~AMD_ASIC_MASK; + flags |= asic_type_quirks[i].type; + break; + } + } + + return flags; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2110,15 +2134,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, "See modparam exp_hw_support\n"); return -ENODEV; } - /* differentiate between P10 and P11 asics with the same DID */ - if (pdev->device == 0x67FF && - (pdev->revision == 0xE3 || - pdev->revision == 0xE7 || - pdev->revision == 0xF3 || - pdev->revision == 0xF7)) { - flags &= ~AMD_ASIC_MASK; - flags |= CHIP_POLARIS10; - } + + flags = amdgpu_fix_asic_type(pdev, flags); /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, * however, SME requires an indirect IOMMU mapping because the encryption diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c92e0aba69e1..b9674c57c436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, struct amdgpu_ring *ring = &kiq->ring; u32 domain = AMDGPU_GEM_DOMAIN_GTT; +#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) domain |= AMDGPU_GEM_DOMAIN_VRAM; +#endif /* create MQD for KIQ */ if (!adev->enable_mes_kiq && !ring->mqd_obj) { @@ -929,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, func(adev, ras_error_status, i); } -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq, reg_val_offs = 0, value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; if (amdgpu_device_skip_hw_access(adev)) @@ -997,12 +999,12 @@ failed_kiq_read: return ~0; } -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 7088c5015675..f23bafec71c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2dce338b0f1e..5f71414190e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->noretry = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 5a828c175e3a..cf33eb219e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -143,6 +143,46 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) return 0; } +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) +{ + if (!mca_set) + return; + + memset(mca_set, 0, sizeof(*mca_set)); + INIT_LIST_HEAD(&mca_set->list); +} + +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) +{ + struct mca_bank_node *node; + + if (!entry) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->entry, entry, sizeof(*entry)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &mca_set->list); + + mca_set->nr_entries++; + + return 0; +} + +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) +{ + struct mca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &mca_set->list, node) { + list_del(&node->node); + kvfree(node); + } +} + void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) { struct amdgpu_mca *mca = &adev->mca; @@ -160,6 +200,65 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +{ + dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n"); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); +} + +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info; + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret, i = 0; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set); + if (ret) + goto out_mca_release; + + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + + count = 0; + ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto out_mca_release; + + if (!count) + continue; + + mcm_info.socket_id = entry->info.socket_id; + mcm_info.die_id = entry->info.aid; + + if (type == AMDGPU_MCA_ERROR_TYPE_UE) + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count); + } + +out_mca_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + + int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; @@ -173,17 +272,77 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m return -EOPNOTSUPP; } -int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count) +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; - if (!count) + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret; + + if (!total) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_error_count) - return mca_funcs->mca_get_error_count(adev, blk, type, count); + if (!mca_funcs) + return -EOPNOTSUPP; - return -EOPNOTSUPP; + if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count) + return -EOPNOTSUPP; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set); + if (ret) + goto err_mca_set_release; + + *total = 0; + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + count = 0; + ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto err_mca_set_release; + + *total += count; + } + +err_mca_set_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + if (!count || !entry) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count) + return -EOPNOTSUPP; + + + return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count); +} + +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!mca_set) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set) + return -EOPNOTSUPP; + + WARN_ON(!list_empty(&mca_set->list)); + + return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set); } int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -230,14 +389,21 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry) { int i, idx = entry->idx; + int reg_idx_array[] = { + MCA_REG_IDX_STATUS, + MCA_REG_IDX_ADDR, + MCA_REG_IDX_MISC0, + MCA_REG_IDX_IPID, + MCA_REG_IDX_SYND, + }; seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE"); seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip); seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype); - for (i = 0; i < ARRAY_SIZE(entry->regs); i++) - seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, i, entry->regs[i]); + for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++) + seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]); } static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 28ad463cf5c9..2b488fcf2f95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -25,6 +25,27 @@ #define MCA_MAX_REGS_COUNT (16) +#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63) +#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62) +#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61) +#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60) +#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59) +#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58) +#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57) +#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56) +#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55) +#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53) +#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46) +#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45) +#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44) +#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43) +#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40) +#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32) +#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24) +#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) +#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, @@ -33,6 +54,7 @@ enum amdgpu_mca_ip { AMDGPU_MCA_IP_SMU, AMDGPU_MCA_IP_MP5, AMDGPU_MCA_IP_UMC, + AMDGPU_MCA_IP_PCS_XGMI, AMDGPU_MCA_IP_COUNT, }; @@ -57,6 +79,15 @@ struct amdgpu_mca { const struct amdgpu_mca_smu_funcs *mca_funcs; }; +enum mca_reg_idx { + MCA_REG_IDX_STATUS = 1, + MCA_REG_IDX_ADDR = 2, + MCA_REG_IDX_MISC0 = 3, + MCA_REG_IDX_IPID = 5, + MCA_REG_IDX_SYND = 6, + MCA_REG_IDX_COUNT = 16, +}; + struct mca_bank_info { int socket_id; int aid; @@ -72,18 +103,28 @@ struct mca_bank_entry { uint64_t regs[MCA_MAX_REGS_COUNT]; }; +struct mca_bank_node { + struct mca_bank_entry entry; + struct list_head node; +}; + +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count); + int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_set *mca_set); + int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count); int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); - int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size); }; void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, @@ -107,11 +148,22 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total); int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 59f10b353b3a..9ddbf1494326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -557,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -994,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0dcb6c36b02c..cef920a93924 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1062,9 +1062,6 @@ static const char * const amdgpu_vram_names[] = { */ int amdgpu_bo_init(struct amdgpu_device *adev) { - /* set the default AGP aperture state */ - amdgpu_gmc_set_agp_default(adev, &adev->gmc); - /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 648bd5e12830..32b701cc0376 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2120,6 +2120,21 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev) return ret; } +int amdgpu_psp_query_boot_status(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + int ret = 0; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return 0; + + if (psp->funcs && + psp->funcs->query_boot_status) + ret = psp->funcs->query_boot_status(psp); + + return ret; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 7111dd32e66f..5d36ad3f48c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -134,6 +134,7 @@ struct psp_funcs { int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); + int (*query_boot_status)(struct psp_context *psp); }; struct ta_funcs { @@ -537,4 +538,6 @@ int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); +int amdgpu_psp_query_boot_status(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 303fbb6a48b6..84e5987b14e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1165,13 +1165,53 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s } } -/* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) +static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, + struct ras_query_if *info, + struct ras_err_data *err_data, + unsigned int error_query_mode) { + enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) + return -EINVAL; + + if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_get_ecc_info(adev, err_data); + } else { + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, &err_data); + + if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || + (info->head.block == AMDGPU_RAS_BLOCK__GFX) || + (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } + } + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + } + + return 0; +} + +/* query/inject/cure begin */ +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +{ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + unsigned int error_query_mode; int ret; if (!obj) @@ -1181,27 +1221,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (ret) return ret; - if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { - amdgpu_ras_get_ecc_info(adev, &err_data); - } else { - block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); - if (!block_obj || !block_obj->hw_ops) { - dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - get_ras_block_str(&info->head)); - ret = -EINVAL; - goto out_fini_err_data; - } - - if (block_obj->hw_ops->query_ras_error_count) - block_obj->hw_ops->query_ras_error_count(adev, &err_data); + if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) + return -EINVAL; - if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || - (info->head.block == AMDGPU_RAS_BLOCK__GFX) || - (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { - if (block_obj->hw_ops->query_ras_error_status) - block_obj->hw_ops->query_ras_error_status(adev); - } - } + ret = amdgpu_ras_query_error_status_helper(adev, info, + &err_data, + error_query_mode); + if (ret) + goto out_fini_err_data; amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); @@ -1222,6 +1249,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", @@ -1229,15 +1258,22 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, return -EOPNOTSUPP; } - /* skip ras error reset in gpu reset */ - if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) && - mca_funcs && mca_funcs->mca_set_debug_mode) - return -EOPNOTSUPP; - if (!amdgpu_ras_is_supported(adev, block) || !amdgpu_ras_get_mca_debug_mode(adev)) return -EOPNOTSUPP; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + /* skip ras error reset in gpu reset */ + if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || + hive_ras_recovery) && + mca_funcs && mca_funcs->mca_set_debug_mode) + return -EOPNOTSUPP; + if (block_obj->hw_ops->reset_ras_error_count) block_obj->hw_ops->reset_ras_error_count(adev); @@ -1528,7 +1564,8 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &con->badpages_attr.attr, RAS_FS_NAME); } @@ -1547,7 +1584,8 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) .attrs = attrs, }; - sysfs_remove_group(&adev->dev->kobj, &group); + if (adev->dev->kobj.sd) + sysfs_remove_group(&adev->dev->kobj, &group); return 0; } @@ -1594,7 +1632,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, if (!obj || !obj->attr_inuse) return -EINVAL; - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &obj->sysfs_attr.attr, RAS_FS_NAME); obj->attr_inuse = 0; @@ -3388,6 +3427,26 @@ bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) return true; } +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *error_query_mode) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) { + *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; + return false; + } + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + *error_query_mode = + (con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + else + *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; + + return true; +} + /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 665414c22ca9..19161916ac46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -320,6 +320,12 @@ enum amdgpu_ras_ret { AMDGPU_RAS_PT, }; +enum amdgpu_ras_error_query_mode { + AMDGPU_RAS_INVALID_ERROR_QUERY = 0, + AMDGPU_RAS_DIRECT_ERROR_QUERY = 1, + AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2, +}; + /* ras error status reisger fields */ #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L @@ -769,6 +775,8 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *mode); int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index f74347cc087a..d65e21914d8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -166,8 +166,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } } - if (reset) + if (reset) { + /* use mode-2 reset for poison consumption */ + if (!entry) + con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; amdgpu_ras_reset_gpu(adev); + } } kfree(err_data->err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 815b7c34ed33..65949cc7abb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -399,20 +399,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; + if (ring == &adev->uvd.inst[0].ring) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; - ring = &adev->uvd.inst[0].ring; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD kernel entity.\n"); - return r; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index a9f342537c68..9dfad2f48ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -73,7 +73,7 @@ struct amdgpu_uvd { int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); -int amdgpu_uvd_entity_init(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1904edf68407..0954447f689d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -231,20 +231,20 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_vce_entity_init(struct amdgpu_device *adev) +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; - - ring = &adev->vce.ring[0]; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; + if (ring == &adev->vce.ring[0]) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; + + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ea680fc9a6c3..6e53f872d084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,7 +55,7 @@ struct amdgpu_vce { int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); -int amdgpu_vce_entity_init(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..3a632c3b1a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; @@ -942,7 +943,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, } } -static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, bool write, u32 *rlcg_flag) { @@ -975,7 +976,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, return ret; } -static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; uint32_t timeout = 50000; @@ -1093,3 +1094,13 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, else return RREG32(offset); } + +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) +{ + bool xnack_mode = true; + + if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + xnack_mode = false; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 858ef21ae515..d4207e44141f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); @@ -365,4 +366,9 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, + u32 acc_flags, u32 hwip, + bool write, u32 *rlcg_flag); +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3cd5977c0709..d1b8afd105c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1098,8 +1098,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; } @@ -1373,6 +1373,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @ticket: optional reservation ticket used to reserve the VM * * Make sure all BOs which are moved are updated in the PTs. * @@ -1382,11 +1383,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; spin_lock(&vm->status_lock); @@ -1409,17 +1411,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) + if (!adev->debug_vm && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->status_lock); } @@ -2130,7 +2139,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2149,6 +2159,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp INIT_LIST_HEAD(&vm->done); INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); if (r) @@ -2183,34 +2194,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); - if (r) - goto error_free_root; + if (r) { + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + goto error_free_root; r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; amdgpu_bo_unreserve(vm->root.bo); - - INIT_KFIFO(vm->faults); + amdgpu_bo_unref(&root_bo); return 0; -error_unreserve: - amdgpu_bo_unreserve(vm->root.bo); - error_free_root: - amdgpu_bo_unref(&root->shadow); + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9c7b5d33b56e..2cd86d2bf73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -443,7 +443,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 18f58efc9dc7..08916538a615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) return true; } +static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 size = 0; + list_for_each_entry(block, head, link) + size += amdgpu_vram_mgr_block_size(block); + + return size; +} /** * DOC: mem_info_vram_total @@ -516,6 +525,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); vres->base.start = 0; + size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), + vres->base.size); list_for_each_entry(block, &vres->blocks, link) { unsigned long start; @@ -523,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > PFN_UP(vres->base.size)) - start -= PFN_UP(vres->base.size); + if (start > PFN_UP(size)) + start -= PFN_UP(size); else start = 0; vres->base.start = max(vres->base.start, start); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 9d5d742ee9d3..bd20cb3b9819 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -103,6 +103,53 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = { smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 }; +static const int xgmi3x16_pcs_err_status_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000 +}; + +static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 +}; + +static const u64 xgmi_v6_4_0_mca_base_array[] = { + 0x11a09200, + 0x11b09200, +}; + +static const char *xgmi_v6_4_0_ras_error_code_ext[32] = { + [0x00] = "XGMI PCS DataLossErr", + [0x01] = "XGMI PCS TrainingErr", + [0x02] = "XGMI PCS FlowCtrlAckErr", + [0x03] = "XGMI PCS RxFifoUnderflowErr", + [0x04] = "XGMI PCS RxFifoOverflowErr", + [0x05] = "XGMI PCS CRCErr", + [0x06] = "XGMI PCS BERExceededErr", + [0x07] = "XGMI PCS TxMetaDataErr", + [0x08] = "XGMI PCS ReplayBufParityErr", + [0x09] = "XGMI PCS DataParityErr", + [0x0a] = "XGMI PCS ReplayFifoOverflowErr", + [0x0b] = "XGMI PCS ReplayFifoUnderflowErr", + [0x0c] = "XGMI PCS ElasticFifoOverflowErr", + [0x0d] = "XGMI PCS DeskewErr", + [0x0e] = "XGMI PCS FlowCtrlCRCErr", + [0x0f] = "XGMI PCS DataStartupLimitErr", + [0x10] = "XGMI PCS FCInitTimeoutErr", + [0x11] = "XGMI PCS RecoveryTimeoutErr", + [0x12] = "XGMI PCS ReadySerialTimeoutErr", + [0x13] = "XGMI PCS ReadySerialAttemptErr", + [0x14] = "XGMI PCS RecoveryAttemptErr", + [0x15] = "XGMI PCS RecoveryRelockAttemptErr", + [0x16] = "XGMI PCS ReplayAttemptErr", + [0x17] = "XGMI PCS SyncHdrErr", + [0x18] = "XGMI PCS TxReplayTimeoutErr", + [0x19] = "XGMI PCS RxReplayTimeoutErr", + [0x1a] = "XGMI PCS LinkSubTxTimeoutErr", + [0x1b] = "XGMI PCS LinkSubRxTimeoutErr", + [0x1c] = "XGMI PCS RxCMDPktErr", +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -926,7 +973,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg WREG32_PCIE(pcs_status_reg, 0); } -static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) { uint32_t i; @@ -952,6 +999,49 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) default: break; } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) + pcs_clear_status(adev, + xgmi3x16_pcs_err_status_reg_v6_4[i]); + break; + default: + break; + } +} + +static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) +{ + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]); +} + +static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev) +{ + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_reset_error_count(adev, i); +} + +static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_reset_ras_error_count(adev); + break; + default: + amdgpu_xgmi_legacy_reset_ras_error_count(adev); + break; + } } static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, @@ -969,7 +1059,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, if (is_xgmi_pcs) { if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == - IP_VERSION(6, 1, 0)) { + IP_VERSION(6, 1, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 0)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -1003,11 +1095,11 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) +static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - int i; + int i, supported = 1; uint32_t data, mask_data = 0; uint32_t ue_cnt = 0, ce_cnt = 0; @@ -1071,7 +1163,25 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; default: - dev_warn(adev->dev, "XGMI RAS error query not supported"); + supported = 0; + break; + } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); + mask_data = + RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, true); + } + break; + default: + if (!supported) + dev_warn(adev->dev, "XGMI RAS error query not supported"); break; } @@ -1081,32 +1191,116 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += ce_cnt; } +static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +{ + const char *error_str; + int ext_error_code; + + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + switch (ext_error_code) { + case 0: + return AMDGPU_MCA_ERROR_TYPE_UE; + case 6: + return AMDGPU_MCA_ERROR_TYPE_CE; + default: + return -EINVAL; + } + + return -EINVAL; +} + +static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 mca_base, struct ras_err_data *err_data) +{ + int xgmi_inst = mcm_info->die_id; + u64 status = 0; + + status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS); + if (!MCA_REG__STATUS__VAL(status)) + return; + + switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { + case AMDGPU_MCA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); + break; + case AMDGPU_MCA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); + break; + default: + break; + } + + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = xgmi_inst, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data); +} + +static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_query_error_count(adev, i, err_data); +} + +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); + break; + default: + amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status); + break; + } +} + /* Trigger XGMI/WAFL error */ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) { - int ret = 0; + int ret1, ret2; struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); + ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) - return ret; + return ret2; - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); - return ret; + return ret2; } struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d9ccacd06fba..c8a3bf01743f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3498,6 +3498,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static int gfx_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); @@ -6465,11 +6467,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -6743,7 +6752,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -6766,7 +6775,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -6787,11 +6796,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); @@ -7172,6 +7181,13 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + /* WA added for Vangogh asic fixing the SMU suspend failure + * It needs to set power gating again during gfxoff control + * otherwise the gfxoff disallowing will be failed to set. + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) + gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE); + if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { if (amdgpu_gfx_disable_kgq(adev, 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index fd22943685f7..0c6133cc5e57 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -155,6 +155,7 @@ static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ @@ -3714,11 +3715,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -4007,7 +4008,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -4030,7 +4031,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -4051,11 +4052,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 41bbabd9ad4d..40d06d32bb74 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1102,6 +1102,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT); } + adev->gfx.rlc.rlcg_reg_access_supported = true; } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) @@ -2738,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; default: break; @@ -3799,6 +3800,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); @@ -3838,6 +3860,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); @@ -4300,7 +4339,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 9, .minor = 4, - .rev = 0, + .rev = 3, .funcs = &gfx_v9_4_3_ip_funcs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d8a4fddab9c1..0ec7b061d7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } @@ -672,6 +672,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); if (!amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 4713a62ad586..6dce9b29f675 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } @@ -637,6 +637,7 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); if (!amdgpu_sriov_vf(adev) || diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 7f66954fd302..42e103d7077d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -211,6 +211,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 61ca1a82b651..efc16e580f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -239,6 +239,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index fa59749c2aef..ff4ae73d27ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -413,6 +413,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b66c5f7e1c56..bde25eb4ed8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - u32 j, inv_req, tmp, sem, req, ack; + u32 j, inv_req, tmp, sem, req, ack, inst; const unsigned int eng = 17; struct amdgpu_vmhub *hub; @@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = GET_INST(GC, 0); + else + inst = vmhub; + if (adev->gfx.kiq[inst].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, inst); return; } @@ -856,9 +860,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst); if (tmp & 0x1) break; udelay(1); @@ -869,9 +873,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst); /* * Issue a dummy read to wait for the ACK register to @@ -884,9 +888,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst); if (tmp & (1 << vmid)) break; udelay(1); @@ -899,9 +903,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, sem, 0); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst); } spin_unlock(&adev->gmc.invalidate_lock); @@ -1176,7 +1180,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - mtype = is_local ? MTYPE_CC : MTYPE_UC; + if (adev->rev_id) + mtype = is_local ? MTYPE_CC : MTYPE_UC; + else + mtype = MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1297,7 +1304,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); - } else { + } else if (adev->rev_id) { /* MTYPE_UC case */ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); @@ -1614,6 +1621,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 3f3a6445c006..49e934975719 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -145,6 +145,10 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) break; } + /* Do not program registers if VF */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 9a8ec4d7e333..82b6b62c170b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -654,9 +654,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -672,9 +674,11 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index e523627cfe25..df218d5ca775 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,6 +28,7 @@ #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#include <linux/device.h> #include <linux/pci.h> #define smnPCIE_CONFIG_CNTL 0x11180044 @@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; @@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) def = data = RREG32_PCIE(smnPCIE_LC_CNTL); data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 4142e2fcd866..3cf4684d0d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -759,6 +759,83 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp) return 0; } + +static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev, + uint32_t inst, + uint32_t boot_error) +{ + uint32_t socket_id; + uint32_t aid_id; + uint32_t hbm_id; + uint32_t reg_data; + + socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID); + aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID); + hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID); + + reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109); + dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", + socket_id, aid_id, reg_data); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", + socket_id, aid_id, hbm_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD)) + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", + socket_id, aid_id, hbm_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", + socket_id, aid_id, hbm_id); +} + +static int psp_v13_0_query_boot_status(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + int inst_mask = adev->aid_mask; + uint32_t reg_data; + uint32_t i; + int ret = 0; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return 0; + + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007) + return 0; + + for_each_inst(i, inst_mask) { + reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126); + if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) { + psp_v13_0_boot_error_reporting(adev, i, reg_data); + ret = -EINVAL; + break; + } + } + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -781,6 +858,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .update_spirom = psp_v13_0_update_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, + .query_boot_status = psp_v13_0_query_boot_status, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index c46bc6aa4f48..0f24af6f2810 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -427,6 +427,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; int i, unset = 0; @@ -444,6 +445,18 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + + if (sdma[i]->use_doorbell) { + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); + + doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA_GFX_DOORBELL_OFFSET, + OFFSET, 0); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset); + } } } @@ -631,12 +644,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); - /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI, upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); @@ -654,6 +661,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -2048,7 +2061,7 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, .minor = 4, - .rev = 0, + .rev = 2, .funcs = &sdma_v4_4_2_ip_funcs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index da683afa0222..242b24f73c17 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,7 +69,7 @@ #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0) -#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ @@ -86,8 +86,8 @@ #define WREG32_SOC15_IP(ip, reg, value) \ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0) -#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ @@ -140,7 +140,7 @@ /* for GC only */ #define RREG32_RLC(reg) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0) #define WREG32_RLC_NO_KIQ(reg, value, hwip) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) @@ -204,4 +204,10 @@ + adev->asic_funcs->encode_ext_smn_addressing(ext), \ value) \ +#define RREG64_MCA(ext, mca_base, idx) \ + RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8)) + +#define WREG64_MCA(ext, mca_base, idx, val) \ + WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d5083c549330..48c6efcdeac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -381,6 +381,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 743d2f68b090..e9c2ff74f0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -88,16 +88,15 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) { return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); } -static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) { return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 4885b9fff272..b34b1e358f8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,6 +117,9 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); + extern const uint32_t umc_v12_0_channel_idx_tbl[] [UMC_V12_0_UMC_INSTANCE_NUM] diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 58a8f78c003c..a6006f231c65 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -577,8 +577,6 @@ static int uvd_v3_1_sw_init(void *handle) ptr += ucode_len; memcpy(&adev->uvd.keyselect, ptr, 4); - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index d3b1e31f5450..1aa09ad7bbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -127,8 +127,6 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 5a8116437abf..f8b229b75435 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -125,8 +125,6 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 74c09230aeb3..a9a6880f44e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -432,8 +432,6 @@ static int uvd_v6_0_sw_init(void *handle) } } - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 1c42cf10cc29..6068b784dc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -480,10 +480,6 @@ static int uvd_v7_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 67eb01fef789..a08e7abca423 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -441,8 +441,6 @@ static int vce_v2_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 18f6e62af339..f4760748d349 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -450,8 +450,6 @@ static int vce_v3_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index e0b70cd3b697..06d787385ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle) return r; } - - r = amdgpu_vce_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 0e792a8496d6..cd8e459201f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1404,6 +1404,66 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } +static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, + struct kfd_gpu_cache_info *pcache_info) +{ + struct amdgpu_device *adev = kdev->adev; + int i = 0; + + /* TCP L1 Cache per CU */ + if (adev->gfx.config.gc_tcp_size_per_cu) { + pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = 1; + i++; + } + /* Scalar L1 Instruction Cache per SQC */ + if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { + pcache_info[i].cache_size = + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; + i++; + } + /* Scalar L1 Data Cache per SQC */ + if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { + pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; + pcache_info[i].cache_level = 1; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc; + i++; + } + /* L2 Data Cache per GPU (Total Tex Cache) */ + if (adev->gfx.config.gc_tcc_size) { + pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size; + pcache_info[i].cache_level = 2; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + i++; + } + /* L3 Data Cache per GPU */ + if (adev->gmc.mall_size) { + pcache_info[i].cache_size = adev->gmc.mall_size / 1024; + pcache_info[i].cache_level = 3; + pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE); + pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + i++; + } + return i; +} + int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; @@ -1461,10 +1521,14 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc num_of_cache_types = ARRAY_SIZE(vega20_cache_info); break; case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): *pcache_info = aldebaran_cache_info; num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; + case IP_VERSION(9, 4, 3): + num_of_cache_types = + kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, + *pcache_info); + break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): *pcache_info = raven_cache_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fbf053001af9..7a33e06f5c90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1416,8 +1416,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) + if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) { + if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { + pr_debug("SRIOV platform xnack not supported\n"); + return false; + } continue; + } /* GFXv10 and later GPUs do not support shader preemption * during page faults. This can lead to poor QoS for queue diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e67d06a42809..f2f3c338fd94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1255,9 +1255,11 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : - (amdgpu_mtype_local == 2 || ext_coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); + if (ext_coherent) + mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + else + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c1e10f42db28..057284bf50bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1602,10 +1602,13 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, unsigned int cu_sibling_map_mask; int first_active_cu; int i, j, k, xcc, start, end; + int num_xcc = NUM_XCC(knode->xcc_mask); struct kfd_cache_properties *pcache = NULL; + enum amdgpu_memory_partition mode; + struct amdgpu_device *adev = knode->adev; start = ffs(knode->xcc_mask) - 1; - end = start + NUM_XCC(knode->xcc_mask); + end = start + num_xcc; cu_sibling_map_mask = cu_info->bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); @@ -1624,7 +1627,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); pcache->cache_level = pcache_info[cache_type].cache_level; - pcache->cache_size = pcache_info[cache_type].cache_size; + + if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3)) + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + else + mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (pcache->cache_level == 2) + pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc; + else if (mode) + pcache->cache_size = pcache_info[cache_type].cache_size / mode; + else + pcache->cache_size = pcache_info[cache_type].cache_size; if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) pcache->cache_type |= HSA_CACHE_TYPE_DATA; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 9cd83b780102..ed784cf27d39 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1216,6 +1216,9 @@ bool dm_helpers_dp_handle_test_pattern_request( } + pipe_ctx->stream->test_pattern.type = test_pattern; + pipe_ctx->stream->test_pattern.color_space = test_pattern_color_space; + dc_link_dp_set_test_pattern( (struct dc_link *) link, test_pattern, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 0f580ea37576..133af994a08c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -37,7 +37,7 @@ #include <drm/drm_framebuffer.h> #include <drm/drm_encoder.h> #include <drm/drm_atomic.h> -#include "dcn10/dcn10_optc.h" +#include "dc/inc/hw/optc.h" #include "dc/inc/core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index f80917f6153b..0fa4fcd00de2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -111,17 +111,21 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } -static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc)) { struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; if (disable) { @@ -301,11 +305,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn35_disable_otg_wa(clk_mgr_base, context, true); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn35_disable_otg_wa(clk_mgr_base, context, false); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } @@ -814,7 +818,8 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle) struct dc *dc = clk_mgr_base->ctx->dc; uint32_t val = dcn35_smu_read_ips_scratch(clk_mgr); - if (dc->config.disable_ips == 0) { + if (dc->config.disable_ips == DMUB_IPS_ENABLE || + dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) { val |= DMUB_IPS1_ALLOW_MASK; val |= DMUB_IPS2_ALLOW_MASK; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { @@ -1114,7 +1119,7 @@ void dcn35_clk_mgr_construct( dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, smu_dpm_clks.dpm_clks); - if (ctx->dc->config.disable_ips == 0) { + if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { bool ips_support = false; /*avoid call pmfw at init*/ @@ -1127,7 +1132,7 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_hpo_power_gate = false; } else { /*let's reset the config control flag*/ - ctx->dc->config.disable_ips = 1; /*pmfw not support it, disable it all*/ + ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 74c21d98b4de..7b9bf5cb4529 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2582,6 +2582,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc, if (u->gamut_remap_matrix) update_flags->bits.gamut_remap_change = 1; + if (u->blend_tf) + update_flags->bits.gamma_change = 1; + if (u->gamma) { enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN; @@ -4113,8 +4116,17 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, bool success = false; struct dc_state *minimal_transition_context; struct pipe_split_policy_backup policy; + struct mall_temp_config mall_temp_config; /* commit based on new context */ + /* Since all phantom pipes are removed in full validation, + * we have to save and restore the subvp/mall config when + * we do a minimal transition since the flags marking the + * pipe as subvp/phantom will be cleared (dc copy constructor + * creates a shallow copy). + */ + if (dc->res_pool->funcs->save_mall_state) + dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); minimal_transition_context = create_minimal_transition_state(dc, context, &policy); if (minimal_transition_context) { @@ -4123,9 +4135,20 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, dc->hwss.is_pipe_topology_transition_seamless( dc, minimal_transition_context, context)) { DC_LOG_DC("%s base = new state\n", __func__); + success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK; } release_minimal_transition_state(dc, minimal_transition_context, &policy); + if (dc->res_pool->funcs->restore_mall_state) + dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); + /* If we do a minimal transition with plane removal and the context + * has subvp we also have to retain back the phantom stream / planes + * since the refcount is decremented as part of the min transition + * (we commit a state with no subvp, so the phantom streams / planes + * had to be removed). + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, context); } if (!success) { @@ -4348,7 +4371,6 @@ static bool full_update_required(struct dc *dc, srf_updates[i].in_transfer_func || srf_updates[i].func_shaper || srf_updates[i].lut3d_func || - srf_updates[i].blend_tf || srf_updates[i].surface->force_full_update || (srf_updates[i].flip_addr && srf_updates[i].flip_addr->address.tmz_surface != srf_updates[i].surface->address.tmz_surface) || @@ -4885,7 +4907,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow) if (dc->debug.disable_idle_power_optimizations) return; - if (dc->caps.ips_support && dc->config.disable_ips) + if (dc->caps.ips_support && (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL)) return; if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present) @@ -4906,7 +4928,7 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc) if (dc->debug.disable_idle_power_optimizations) return false; - if (!dc->caps.ips_support || dc->config.disable_ips) + if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL)) return false; if (dc->hwss.get_idle_state) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6ed40b6c6178..4bdf105d1d71 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -533,7 +533,7 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) for (i = 0; i < MAX_PIPES; i++) { struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg; - if (res_ctx->pipe_ctx[i].stream != stream) + if (res_ctx->pipe_ctx[i].stream != stream || !tg) continue; return tg->funcs->get_frame_count(tg); @@ -592,7 +592,7 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, for (i = 0; i < MAX_PIPES; i++) { struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg; - if (res_ctx->pipe_ctx[i].stream != stream) + if (res_ctx->pipe_ctx[i].stream != stream || !tg) continue; tg->funcs->get_scanoutpos(tg, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6e54ca055fcb..9316b737a8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.256" +#define DC_VER "3.2.259" #define MAX_SURFACES 3 #define MAX_PLANES 6 diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ba142bef626b..e4c007203318 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -120,6 +120,80 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dmub_srv *dmub; + enum dmub_status status; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + // Queue command + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + + if (status == DMUB_STATUS_QUEUE_FULL) { + /* Execute and wait for queue to become empty again. */ + dmub_srv_cmd_execute(dmub); + dmub_srv_wait_for_idle(dmub, 100000); + + /* Requeue the command. */ + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + } + + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + } + + status = dmub_srv_cmd_execute(dmub); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + + return true; +} + +bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, + enum dm_dmub_wait_type wait_type, + union dmub_rb_cmd *cmd_list) +{ + struct dmub_srv *dmub; + enum dmub_status status; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dmub = dc_dmub_srv->dmub; + + // Wait for DMUB to process command + if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { + status = dmub_srv_wait_for_idle(dmub, 100000); + + if (status != DMUB_STATUS_OK) { + DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + + // Copy data back from ring buffer into command + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + } + + return true; +} + bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type) { return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 31150b214394..d4a60f53faab 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -56,6 +56,14 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list); + +bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, + enum dm_dmub_wait_type wait_type, + union dmub_rb_cmd *cmd_list); + bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 35ae245ef722..eeeeeef4d717 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -142,7 +142,8 @@ enum dp_test_link_rate { DP_TEST_LINK_RATE_HBR3 = 0x1E, DP_TEST_LINK_RATE_UHBR10 = 0x01, DP_TEST_LINK_RATE_UHBR20 = 0x02, - DP_TEST_LINK_RATE_UHBR13_5 = 0x03, + DP_TEST_LINK_RATE_UHBR13_5_LEGACY = 0x03, /* For backward compatibility*/ + DP_TEST_LINK_RATE_UHBR13_5 = 0x04, }; struct dc_link_settings { diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 40dc51853d62..cea666ea66c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1037,7 +1037,9 @@ struct replay_config { bool replay_smu_opt_supported; // SMU optimization is supported unsigned int replay_enable_option; // Replay enablement option uint32_t debug_flags; // Replay debug flags - bool replay_timing_sync_supported; // Replay desync is supported + bool replay_timing_sync_supported; // Replay desync is supported + bool force_disable_desync_error_check; // Replay desync is supported + bool received_desync_error_hpd; //Replay Received Desync Error HPD. union replay_error_status replay_error_status; // Replay error status }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h index c50aa30614be..051e4c2b4cf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h @@ -128,21 +128,6 @@ SRI(DC_ABM1_ACE_THRES_12, ABM, id), \ NBIO_SR(BIOS_SCRATCH_2) -#define ABM_DCN32_REG_LIST(id)\ - SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ - SRI(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ - SRI(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ - SRI(DC_ABM1_HG_MISC_CTRL, ABM, id), \ - SRI(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \ - SRI(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \ - SRI(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \ - SRI(BL1_PWM_USER_LEVEL, ABM, id), \ - SRI(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ - SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ - SRI(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \ - SRI(DC_ABM1_ACE_THRES_12, ABM, id), \ - NBIO_SR(BIOS_SCRATCH_2) - #define ABM_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index aaf6c981fd9e..ab81594a7fad 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -26,7 +26,7 @@ #ifndef __DC_TIMING_GENERATOR_DCN10_H__ #define __DC_TIMING_GENERATOR_DCN10_H__ -#include "timing_generator.h" +#include "optc.h" #define DCN10TG_FROM_TG(tg)\ container_of(tg, struct optc, base) @@ -594,190 +594,6 @@ struct dcn_optc_mask { TG_REG_FIELD_LIST_DCN3_5(uint32_t) }; -struct optc { - struct timing_generator base; - - const struct dcn_optc_registers *tg_regs; - const struct dcn_optc_shift *tg_shift; - const struct dcn_optc_mask *tg_mask; - - int opp_count; - - uint32_t max_h_total; - uint32_t max_v_total; - - uint32_t min_h_blank; - - uint32_t min_h_sync_width; - uint32_t min_v_sync_width; - uint32_t min_v_blank; - uint32_t min_v_blank_interlace; - - int vstartup_start; - int vupdate_offset; - int vupdate_width; - int vready_offset; - struct dc_crtc_timing orginal_patched_timing; - enum signal_type signal; -}; - void dcn10_timing_generator_init(struct optc *optc); -struct dcn_otg_state { - uint32_t v_blank_start; - uint32_t v_blank_end; - uint32_t v_sync_a_pol; - uint32_t v_total; - uint32_t v_total_max; - uint32_t v_total_min; - uint32_t v_total_min_sel; - uint32_t v_total_max_sel; - uint32_t v_sync_a_start; - uint32_t v_sync_a_end; - uint32_t h_blank_start; - uint32_t h_blank_end; - uint32_t h_sync_a_start; - uint32_t h_sync_a_end; - uint32_t h_sync_a_pol; - uint32_t h_total; - uint32_t underflow_occurred_status; - uint32_t otg_enabled; - uint32_t blank_enabled; - uint32_t vertical_interrupt1_en; - uint32_t vertical_interrupt1_line; - uint32_t vertical_interrupt2_en; - uint32_t vertical_interrupt2_line; -}; - -void optc1_read_otg_state(struct optc *optc1, - struct dcn_otg_state *s); - -bool optc1_get_hw_timing(struct timing_generator *tg, - struct dc_crtc_timing *hw_crtc_timing); - -bool optc1_validate_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *timing); - -void optc1_program_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width, - const enum signal_type signal, - bool use_vbios); - -void optc1_setup_vertical_interrupt0( - struct timing_generator *optc, - uint32_t start_line, - uint32_t end_line); -void optc1_setup_vertical_interrupt1( - struct timing_generator *optc, - uint32_t start_line); -void optc1_setup_vertical_interrupt2( - struct timing_generator *optc, - uint32_t start_line); - -void optc1_program_global_sync( - struct timing_generator *optc, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width); - -bool optc1_disable_crtc(struct timing_generator *optc); - -bool optc1_is_counter_moving(struct timing_generator *optc); - -void optc1_get_position(struct timing_generator *optc, - struct crtc_position *position); - -uint32_t optc1_get_vblank_counter(struct timing_generator *optc); - -void optc1_get_crtc_scanoutpos( - struct timing_generator *optc, - uint32_t *v_blank_start, - uint32_t *v_blank_end, - uint32_t *h_position, - uint32_t *v_position); - -void optc1_set_early_control( - struct timing_generator *optc, - uint32_t early_cntl); - -void optc1_wait_for_state(struct timing_generator *optc, - enum crtc_state state); - -void optc1_set_blank(struct timing_generator *optc, - bool enable_blanking); - -bool optc1_is_blanked(struct timing_generator *optc); - -void optc1_program_blank_color( - struct timing_generator *optc, - const struct tg_color *black_color); - -bool optc1_did_triggered_reset_occur( - struct timing_generator *optc); - -void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst); - -void optc1_disable_reset_trigger(struct timing_generator *optc); - -void optc1_lock(struct timing_generator *optc); - -void optc1_unlock(struct timing_generator *optc); - -void optc1_enable_optc_clock(struct timing_generator *optc, bool enable); - -void optc1_set_drr( - struct timing_generator *optc, - const struct drr_params *params); - -void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); - -void optc1_set_static_screen_control( - struct timing_generator *optc, - uint32_t event_triggers, - uint32_t num_frames); - -void optc1_program_stereo(struct timing_generator *optc, - const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); - -bool optc1_is_stereo_left_eye(struct timing_generator *optc); - -void optc1_clear_optc_underflow(struct timing_generator *optc); - -void optc1_tg_init(struct timing_generator *optc); - -bool optc1_is_tg_enabled(struct timing_generator *optc); - -bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); - -void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); - -void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable); - -bool optc1_get_otg_active_size(struct timing_generator *optc, - uint32_t *otg_active_width, - uint32_t *otg_active_height); - -void optc1_enable_crtc_reset( - struct timing_generator *optc, - int source_tg_inst, - struct crtc_trigger_info *crtc_tp); - -bool optc1_configure_crc(struct timing_generator *optc, - const struct crc_params *params); - -bool optc1_get_crc(struct timing_generator *optc, - uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); - -bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); - -void optc1_set_vtg_params(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2); - #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index 5eebe7f03ddc..c9ae2d8f0096 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -137,7 +137,15 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2; } - // TODO DSC: This is actually image width limitation, not a slice width. This should be added to the criteria to use ODM. + /* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our + * throughput and number of slices + */ + if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) { + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; + dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4; + } + dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 50dc83404644..11f7746f3a65 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -613,16 +613,19 @@ static void dpp3_program_blnd_pwl( REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg); REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red); } else { + REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0); REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 4); for (i = 0 ; i < num; i++) REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg); REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red); + REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0); REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 2); for (i = 0 ; i < num; i++) REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].green_reg); REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_green); + REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0); REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 1); for (i = 0 ; i < num; i++) REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].blue_reg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index 2861d974fcf6..75547ce86c09 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -316,7 +316,7 @@ bool hubp3_program_surface_flip_and_addr( return true; } -static void hubp3_program_tiling( +void hubp3_program_tiling( struct dcn20_hubp *hubp2, const union dc_tiling_info *info, const enum surface_pixel_format pixel_format) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h index 8a32772d4e91..b010531a7fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h @@ -278,6 +278,11 @@ void hubp3_setup( struct _vcs_dpi_display_rq_regs_st *rq_regs, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); +void hubp3_program_tiling( + struct dcn20_hubp *hubp2, + const union dc_tiling_info *info, + const enum surface_pixel_format pixel_format); + void hubp3_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 1d052f08aff5..994b21ed272f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -237,16 +237,19 @@ void mpc32_program_post1dlut_pwl( REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg); REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red); } else { + REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0); REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 4); for (i = 0 ; i < num; i++) REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg); REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red); + REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0); REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 2); for (i = 0 ; i < num; i++) REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].green_reg); REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_green); + REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0); REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 1); for (i = 0 ; i < num; i++) REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].blue_reg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index addedcfd1238..479f3683c0b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -325,6 +325,43 @@ static void dccg35_set_dpstreamclk( } } +static void dccg35_set_physymclk_root_clock_gating( + struct dccg *dccg, + int phy_inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (phy_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_set_physymclk( struct dccg *dccg, int phy_inst, @@ -340,16 +377,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, 1, PHYASYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYASYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, 0, PHYASYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYASYMCLK_ROOT_GATE_DISABLE, 0); } break; case 1: @@ -357,16 +388,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, 1, PHYBSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, 0, PHYBSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYBSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 2: @@ -374,16 +399,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, 1, PHYCSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, 0, PHYCSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYCSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 3: @@ -391,16 +410,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, 1, PHYDSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, 0, PHYDSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYDSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 4: @@ -408,16 +421,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, 1, PHYESYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, 0, PHYESYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYESYMCLK_ROOT_GATE_DISABLE, 0); } break; default: @@ -490,8 +497,8 @@ void dccg35_init(struct dccg *dccg) if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) for (otg_inst = 0; otg_inst < 5; otg_inst++) - dccg35_set_physymclk(dccg, otg_inst, - PHYSYMCLK_FORCE_SRC_SYMCLK, false); + dccg35_set_physymclk_root_clock_gating(dccg, otg_inst, + false); /* dccg35_enable_global_fgcg_rep( dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits @@ -754,7 +761,9 @@ static const struct dccg_funcs dccg35_funcs = { .disable_symclk32_se = dccg31_disable_symclk32_se, .enable_symclk32_le = dccg31_enable_symclk32_le, .disable_symclk32_le = dccg31_disable_symclk32_le, + .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating, .set_physymclk = dccg35_set_physymclk, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating, .set_dtbclk_dto = dccg35_set_dtbclk_dto, .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c index 1ed58660779e..771fcd0d3b99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.c @@ -53,11 +53,146 @@ static void hubp35_init(struct hubp *hubp) /*do nothing for now for dcn3.5 or later*/ } + +void hubp35_program_pixel_format( + struct hubp *hubp, + enum surface_pixel_format format) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + uint32_t green_bar = 1; + uint32_t red_bar = 3; + uint32_t blue_bar = 2; + + /* swap for ABGR format */ + if (format == SURFACE_PIXEL_FORMAT_GRPH_ABGR8888 + || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010 + || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS + || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 + || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F) { + red_bar = 2; + blue_bar = 3; + } + + REG_UPDATE_3(HUBPRET_CONTROL, + CROSSBAR_SRC_Y_G, green_bar, + CROSSBAR_SRC_CB_B, blue_bar, + CROSSBAR_SRC_CR_R, red_bar); + + /* Mapping is same as ipp programming (cnvc) */ + + switch (format) { + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 1); + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 3); + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 8); + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 10); + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /* we use crossbar already */ + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */ + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:/*we use crossbar already*/ + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 24); + break; + + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 65); + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 64); + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 67); + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 66); + break; + case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 12); + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 112); + break; + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 113); + break; + case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 114); + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 118); + break; + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT: + REG_UPDATE(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 119); + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE: + REG_UPDATE_2(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 116, + ALPHA_PLANE_EN, 0); + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + REG_UPDATE_2(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, 116, + ALPHA_PLANE_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + + /* don't see the need of program the xbar in DCN 1.0 */ +} + +void hubp35_program_surface_config( + struct hubp *hubp, + enum surface_pixel_format format, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + unsigned int compat_level) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + hubp3_dcc_control_sienna_cichlid(hubp, dcc); + hubp3_program_tiling(hubp2, tiling_info, format); + hubp2_program_size(hubp, format, plane_size, dcc); + hubp2_program_rotation(hubp, rotation, horizontal_mirror); + hubp35_program_pixel_format(hubp, format); +} + struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, - .hubp_program_surface_config = hubp3_program_surface_config, + .hubp_program_surface_config = hubp35_program_surface_config, .hubp_is_flip_pending = hubp2_is_flip_pending, .hubp_setup = hubp3_setup, .hubp_setup_interdependent = hubp2_setup_interdependent, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h index 3d830f93141e..586b43aa5834 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_hubp.h @@ -58,4 +58,18 @@ bool hubp35_construct( void hubp35_set_fgcg(struct hubp *hubp, bool enable); +void hubp35_program_pixel_format( + struct hubp *hubp, + enum surface_pixel_format format); + +void hubp35_program_surface_config( + struct hubp *hubp, + enum surface_pixel_format format, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + unsigned int compat_level); + #endif /* __DC_HUBP_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 0f60c40e1fc5..46f71ff08fd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -332,6 +332,13 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) pg_cntl->pg_res_enable[PG_DCIO] = power_on; } +void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on) +{ + struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); + + REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0); +} + static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl) { struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); @@ -501,7 +508,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control + .dwb_pg_control = pg_cntl35_dwb_pg_control, + .set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22 }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h index 3de240884d22..069dae08e222 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h @@ -183,6 +183,7 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on); void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl); +void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on); struct pg_cntl *pg_cntl35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index 3c7c810bab1f..c7e011d26d41 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -610,7 +610,23 @@ static struct dce_hwseq_registers hwseq_reg; HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\ HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\ HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh) + HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) static const struct dce_hwseq_shift hwseq_shift = { HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) @@ -708,7 +724,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = true, + .cm = false, .mpc = true, .optc = true, .vpg = true, @@ -719,14 +735,14 @@ static const struct dc_debug_options debug_defaults_drv = { .bits = { .dpp = true, .dsc = true,/*dscclk and dsc pg*/ - .hdmistream = false, - .hdmichar = false, - .dpstream = false, - .symclk32_se = false, - .symclk32_le = false, - .symclk_fe = false, - .physymclk = false, - .dpiasymclk = false, + .hdmistream = true, + .hdmichar = true, + .dpstream = true, + .symclk32_se = true, + .symclk32_le = true, + .symclk_fe = true, + .physymclk = true, + .dpiasymclk = true, } }, .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, @@ -741,7 +757,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_boot_optimizations = false, .disable_unbounded_requesting = false, .disable_mem_low_power = false, - .enable_hpo_pg_support = false, //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index ad741a723c0e..3686f1e7de3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -5128,7 +5128,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l ViewportExceedsSurface = true; if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 - && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) { + && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) { if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { ViewportExceedsSurface = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 70ae5eba624e..acff3449b8d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -60,8 +60,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index d2046e770c50..1a2b24cc6b61 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -55,10 +55,11 @@ struct dc_pipe_mapping_scratch { struct dc_plane_pipe_pool pipe_pool; }; -static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -66,7 +67,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta for (i = 0; i < state->stream_count; i++) { if (state->streams[i]->stream_id == stream_id) { for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -123,8 +125,9 @@ static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx, unsigned int plane_id_assigned_to_pipe; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state, - state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { if (plane_id_assigned_to_pipe == plane_id) return &state->res_ctx.pipe_ctx[i]; } @@ -141,8 +144,9 @@ static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, unsigned int plane_id_assigned_to_pipe; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state, - state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { if (plane_id_assigned_to_pipe == plane_id) pipes[num_found++] = i; } @@ -609,6 +613,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d const struct dc_plane_state *plane, int odm_factor, int mpc_factor, + int plane_index, struct dc_plane_pipe_pool *pipe_pool, const struct dc_state *existing_state) { @@ -620,7 +625,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d unsigned int next_pipe_to_assign; int odm_slice, mpc_slice; - if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) { + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { ASSERT(false); return master_pipe; } @@ -667,12 +672,16 @@ static void free_pipe(struct pipe_ctx *pipe) } static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, - const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id) + const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index) { int i; + bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists; + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { if (state->res_ctx.pipe_ctx[i].plane_state == plane && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id && + (!is_plane_duplicate || (is_plane_duplicate && + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index)) && !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) { free_pipe(&state->res_ctx.pipe_ctx[i]); } @@ -717,19 +726,20 @@ static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *stat } static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane, - struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) + int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) { int odm_slice_index; unsigned int plane_id; struct pipe_ctx *master_pipe = NULL; int i; - if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) { + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { ASSERT(false); return; } - master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, scratch->mpc_info.mpc_factor, &scratch->pipe_pool, existing_state); + master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, + scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state); sort_pipes_for_splitting(&scratch->pipe_pool); for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { @@ -755,7 +765,7 @@ static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state } } - free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id); + free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index); } static unsigned int get_mpc_factor(struct dml2_context *ctx, @@ -768,7 +778,7 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx, unsigned int plane_id; unsigned int cfg_idx; - get_plane_id(state, status->plane_states[plane_idx], stream_id, &plane_id); + get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id); cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); if (ctx->architecture == dml2_architecture_20) return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; @@ -911,26 +921,14 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s unsigned int stream_id; const unsigned int *ODMMode, *DPPPerSurface; - unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; struct dc_pipe_mapping_scratch scratch; if (ctx->config.map_dc_pipes_with_callbacks) return map_dc_pipes_with_callbacks( ctx, state, disp_cfg, mapping, existing_state); - if (ctx->architecture == dml2_architecture_21) { - /* - * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes. - * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to - * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const. - * - */ - ODMMode = (const unsigned int *)odm_mode_array; - DPPPerSurface = (const unsigned int *)dpp_per_surface_array; - } else { - ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; - DPPPerSurface = disp_cfg->hw.DPPPerSurface; - } + ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; + DPPPerSurface = disp_cfg->hw.DPPPerSurface; for (stream_index = 0; stream_index < state->stream_count; stream_index++) { memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch)); @@ -958,8 +956,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) { // Planes are ordered top to bottom. - if (get_plane_id(state, state->stream_status[stream_index].plane_states[plane_index], - stream_id, &plane_id)) { + if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index], + stream_id, plane_index, &plane_id)) { plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); // Setup mpc_info for this plane @@ -983,7 +981,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s // Clear the pool assignment scratch (which is per plane) memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool)); - map_pipes_for_plane(ctx, state, state->streams[stream_index], state->stream_status[stream_index].plane_states[plane_index], &scratch, existing_state); + map_pipes_for_plane(ctx, state, state->streams[stream_index], + state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state); } else { // Plane ID cannot be generated, therefore no DML mapping can be performed. ASSERT(false); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index ed5b767d46e0..1cf8a884c0fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -75,6 +75,8 @@ struct dml2_dml_to_dc_pipe_mapping { bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; }; struct dml2_wrapper_scratch { @@ -96,6 +98,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; + bool plane_duplicate_exists; }; struct dml2_helper_det_policy_scratch { @@ -104,7 +107,6 @@ struct dml2_helper_det_policy_scratch { enum dml2_architecture { dml2_architecture_20, - dml2_architecture_21 }; struct dml2_context { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 89836f175a13..75171bee6f71 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -231,6 +231,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s out->num_chans = 4; out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; + out->dispclk_dppclk_vco_speed_mhz = 3600; break; case dml_project_dcn351: @@ -930,10 +931,11 @@ static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml return location; } -static bool get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -941,7 +943,8 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s for (i = 0; i < context->stream_count; i++) { if (context->streams[i]->stream_id == stream_id) { for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { + if (context->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -953,13 +956,13 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s } static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, - const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id) + const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) { unsigned int plane_id; int i = 0; int location = -1; - if (!get_plane_id(context, plane, stream_id, &plane_id)) { + if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) { ASSERT(false); return -1; } @@ -990,7 +993,41 @@ static void apply_legacy_svp_drr_settings(struct dml2_context *dml2, const struc } } -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) +static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) +{ + unsigned int i; + unsigned int pipe_index = 0; + unsigned int plane_index = 0; + struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe || !pipe->stream || !pipe->plane_state) + continue; + + while (pipe) { + pipe_index = pipe->pipe_idx; + + if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index; + plane_index++; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true; + } + + pipe = pipe->bottom_pipe; + } + + plane_index = 0; + } +} + +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { int i = 0, j = 0; int disp_cfg_stream_location, disp_cfg_plane_location; @@ -1007,6 +1044,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4; dml_dispcfg->plane.HostVMEnable = false; + dml2_populate_pipe_to_plane_index_mapping(dml2, context); + for (i = 0; i < context->stream_count; i++) { disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); @@ -1043,7 +1082,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d } else { for (j = 0; j < context->stream_status[i].plane_count; j++) { disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2, - context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id); + context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; @@ -1067,7 +1106,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; - if (get_plane_id(context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, + if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j, &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index dac6d27b14cd..d764773938f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -34,7 +34,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out); void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out); void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 69fd96f4f3b0..2498b8341199 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -209,10 +209,11 @@ static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int return -1; } -static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -220,7 +221,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta for (i = 0; i < state->stream_count; i++) { if (state->streams[i]->stream_id == stream_id) { for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -304,8 +306,9 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont * there is a need to know which DML pipe index maps to which DC pipe. The code below * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then * it finds a dml_pipe_index from the stream id. */ - if (get_plane_id(context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, &plane_id)) { + if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) { dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); } else { dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); @@ -445,8 +448,9 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc for (i = 0; i < MAX_PIPES; i++) { if (!display_state->res_ctx.pipe_ctx[i].stream) continue; - if (get_plane_id(display_state, display_state->res_ctx.pipe_ctx[i].plane_state, - display_state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id)) + if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state, + display_state->res_ctx.pipe_ctx[i].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id)) dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); else dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 0a06bf3b135a..8f231418870f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -639,7 +639,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s return result; } -static bool dml2_validate_only(const struct dc_state *context) +static bool dml2_validate_only(struct dc_state *context) { struct dml2_context *dml2 = context->bw_ctx.dml2; unsigned int result = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 3966845c7694..e8b5f17beb96 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -512,6 +512,11 @@ static bool intersect_dsc_caps( dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; dsc_common_caps->slice_caps.bits.NUM_SLICES_8 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; + dsc_common_caps->slice_caps.bits.NUM_SLICES_12 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_12 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_12; + dsc_common_caps->slice_caps.bits.NUM_SLICES_16 = + dsc_sink_caps->slice_caps2.bits.NUM_SLICES_16 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_16; + if (!dsc_common_caps->slice_caps.raw) return false; @@ -703,6 +708,12 @@ static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *av if (slice_caps.bits.NUM_SLICES_8) available_slices[idx++] = 8; + if (slice_caps.bits.NUM_SLICES_12) + available_slices[idx++] = 12; + + if (slice_caps.bits.NUM_SLICES_16) + available_slices[idx++] = 16; + return idx; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h index 2fefdf40612d..44b4df6469d1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h @@ -1183,7 +1183,23 @@ struct dce_hwseq_registers { type LONO_FGCG_REP_DIS;\ type LONO_DISPCLK_GATE_DISABLE;\ type LONO_SOCCLK_GATE_DISABLE;\ - type LONO_DMCUBCLK_GATE_DISABLE; + type LONO_DMCUBCLK_GATE_DISABLE;\ + type SYMCLKA_FE_GATE_DISABLE;\ + type SYMCLKB_FE_GATE_DISABLE;\ + type SYMCLKC_FE_GATE_DISABLE;\ + type SYMCLKD_FE_GATE_DISABLE;\ + type SYMCLKE_FE_GATE_DISABLE;\ + type HDMICHARCLK0_GATE_DISABLE;\ + type SYMCLKA_GATE_DISABLE;\ + type SYMCLKB_GATE_DISABLE;\ + type SYMCLKC_GATE_DISABLE;\ + type SYMCLKD_GATE_DISABLE;\ + type SYMCLKE_GATE_DISABLE;\ + type PHYASYMCLK_ROOT_GATE_DISABLE;\ + type PHYBSYMCLK_ROOT_GATE_DISABLE;\ + type PHYCSYMCLK_ROOT_GATE_DISABLE;\ + type PHYDSYMCLK_ROOT_GATE_DISABLE;\ + type PHYESYMCLK_ROOT_GATE_DISABLE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 1b9f21fd4f17..6a65af8c36b9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -615,12 +615,6 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, false); - } - - /* Today only FPO uses cursor P-State force. Only clear cursor P-State force - * if it's not FPO. - */ - if (!pipe->stream || !pipe->stream->fpo_in_use) { if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow) hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, false); } @@ -632,17 +626,10 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || + pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); - } - - if (pipe->stream && pipe->stream->fpo_in_use) { - if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) - hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); - /* For now only force cursor p-state disallow for FPO - * Needs to be added for subvp once FW side gets updated - */ if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow) hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, true); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 34737d60b965..5a8258287438 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -138,16 +138,25 @@ void dcn35_init_hw(struct dc *dc) if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0x3F000000); - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); - //dcn35_set_dmu_fgcg(hws, dc->debug.enable_fine_grain_clock_gating.bits.dmu); if (!dcb->funcs->is_accelerated_mode(dcb)) { /*this calls into dmubfw to do the init*/ hws->funcs.bios_golden_init(dc); } + + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, + PHYBSYMCLK_ROOT_GATE_DISABLE, 1, + PHYCSYMCLK_ROOT_GATE_DISABLE, 1, + PHYDSYMCLK_ROOT_GATE_DISABLE, 1, + PHYESYMCLK_ROOT_GATE_DISABLE, 1); + + REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); + // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) res_pool->dccg->funcs->dccg_init(res_pool->dccg); @@ -274,7 +283,19 @@ void dcn35_init_hw(struct dc *dc) if (!dc->debug.disable_clock_gate) { /* enable all DCN clock gating */ REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, 0, + SYMCLKB_FE_GATE_DISABLE, 0, + SYMCLKC_FE_GATE_DISABLE, 0, + SYMCLKD_FE_GATE_DISABLE, 0, + SYMCLKE_FE_GATE_DISABLE, 0); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, 0); + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, 0, + SYMCLKB_GATE_DISABLE, 0, + SYMCLKC_GATE_DISABLE, 0, + SYMCLKD_GATE_DISABLE, 0, + SYMCLKE_GATE_DISABLE, 0); + REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } @@ -311,6 +332,9 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl) { if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); + + if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22) + dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 13f12f2a3f81..ce2f0c0e82bd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -141,6 +141,11 @@ struct dccg_funcs { enum physymclk_clock_source clk_src, bool force_enable); + void (*set_physymclk_root_clock_gating)( + struct dccg *dccg, + int phy_inst, + bool enable); + void (*set_dtbclk_dto)( struct dccg *dccg, const struct dtbclk_dto_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index d7b8d586b523..4b27f29d0d80 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -76,6 +76,8 @@ union dsc_enc_slice_caps { uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */ uint8_t NUM_SLICES_4 : 1; uint8_t NUM_SLICES_8 : 1; + uint8_t NUM_SLICES_12 : 1; + uint8_t NUM_SLICES_16 : 1; } bits; uint8_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h new file mode 100644 index 000000000000..9a8bf6ec70ea --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +/** + * DOC: overview + * + * Output Pipe Timing Combiner (OPTC) includes two major functional blocks: + * Output Data Mapper (ODM) and Output Timing Generator (OTG). + * + * - ODM: It is Output Data Mapping block. It can combine input data from + * multiple OPP data pipes into one single data stream or split data from one + * OPP data pipe into multiple data streams or just bypass OPP data to DIO. + * - OTG: It is Output Timing Generator. It generates display timing signals to + * drive the display output. + */ + +#ifndef __DC_OPTC_H__ +#define __DC_OPTC_H__ + +#include "timing_generator.h" + +struct optc { + struct timing_generator base; + + const struct dcn_optc_registers *tg_regs; + const struct dcn_optc_shift *tg_shift; + const struct dcn_optc_mask *tg_mask; + + int opp_count; + + uint32_t max_h_total; + uint32_t max_v_total; + + uint32_t min_h_blank; + + uint32_t min_h_sync_width; + uint32_t min_v_sync_width; + uint32_t min_v_blank; + uint32_t min_v_blank_interlace; + + int vstartup_start; + int vupdate_offset; + int vupdate_width; + int vready_offset; + struct dc_crtc_timing orginal_patched_timing; + enum signal_type signal; +}; + +struct dcn_otg_state { + uint32_t v_blank_start; + uint32_t v_blank_end; + uint32_t v_sync_a_pol; + uint32_t v_total; + uint32_t v_total_max; + uint32_t v_total_min; + uint32_t v_total_min_sel; + uint32_t v_total_max_sel; + uint32_t v_sync_a_start; + uint32_t v_sync_a_end; + uint32_t h_blank_start; + uint32_t h_blank_end; + uint32_t h_sync_a_start; + uint32_t h_sync_a_end; + uint32_t h_sync_a_pol; + uint32_t h_total; + uint32_t underflow_occurred_status; + uint32_t otg_enabled; + uint32_t blank_enabled; + uint32_t vertical_interrupt1_en; + uint32_t vertical_interrupt1_line; + uint32_t vertical_interrupt2_en; + uint32_t vertical_interrupt2_line; +}; + +void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s); + +bool optc1_get_hw_timing(struct timing_generator *tg, struct dc_crtc_timing *hw_crtc_timing); + +bool optc1_validate_timing(struct timing_generator *optc, + const struct dc_crtc_timing *timing); + +void optc1_program_timing(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, + bool use_vbios); + +void optc1_setup_vertical_interrupt0(struct timing_generator *optc, + uint32_t start_line, + uint32_t end_line); + +void optc1_setup_vertical_interrupt1(struct timing_generator *optc, + uint32_t start_line); + +void optc1_setup_vertical_interrupt2(struct timing_generator *optc, + uint32_t start_line); + +void optc1_program_global_sync(struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); + +bool optc1_disable_crtc(struct timing_generator *optc); + +bool optc1_is_counter_moving(struct timing_generator *optc); + +void optc1_get_position(struct timing_generator *optc, + struct crtc_position *position); + +uint32_t optc1_get_vblank_counter(struct timing_generator *optc); + +void optc1_get_crtc_scanoutpos(struct timing_generator *optc, + uint32_t *v_blank_start, + uint32_t *v_blank_end, + uint32_t *h_position, + uint32_t *v_position); + +void optc1_set_early_control(struct timing_generator *optc, + uint32_t early_cntl); + +void optc1_wait_for_state(struct timing_generator *optc, + enum crtc_state state); + +void optc1_set_blank(struct timing_generator *optc, + bool enable_blanking); + +bool optc1_is_blanked(struct timing_generator *optc); + +void optc1_program_blank_color(struct timing_generator *optc, + const struct tg_color *black_color); + +bool optc1_did_triggered_reset_occur(struct timing_generator *optc); + +void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst); + +void optc1_disable_reset_trigger(struct timing_generator *optc); + +void optc1_lock(struct timing_generator *optc); + +void optc1_unlock(struct timing_generator *optc); + +void optc1_enable_optc_clock(struct timing_generator *optc, bool enable); + +void optc1_set_drr(struct timing_generator *optc, + const struct drr_params *params); + +void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); + +void optc1_set_static_screen_control(struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames); + +void optc1_program_stereo(struct timing_generator *optc, + const struct dc_crtc_timing *timing, + struct crtc_stereo_flags *flags); + +bool optc1_is_stereo_left_eye(struct timing_generator *optc); + +void optc1_clear_optc_underflow(struct timing_generator *optc); + +void optc1_tg_init(struct timing_generator *optc); + +bool optc1_is_tg_enabled(struct timing_generator *optc); + +bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); + +void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); + +void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable); + +bool optc1_get_otg_active_size(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height); + +void optc1_enable_crtc_reset(struct timing_generator *optc, + int source_tg_inst, + struct crtc_trigger_info *crtc_tp); + +bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params); + +bool optc1_get_crc(struct timing_generator *optc, + uint32_t *r_cr, + uint32_t *g_y, + uint32_t *b_cb); + +bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); + +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + bool program_fp2); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 00ea3864dd4d..b9812afb886b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -47,6 +47,8 @@ struct pg_cntl_funcs { void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); + + void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 21a39afd274b..2d152b68a501 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -53,6 +53,7 @@ static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate) return LINK_RATE_UHBR10; case DP_TEST_LINK_RATE_UHBR20: return LINK_RATE_UHBR20; + case DP_TEST_LINK_RATE_UHBR13_5_LEGACY: case DP_TEST_LINK_RATE_UHBR13_5: return LINK_RATE_UHBR13_5; default: @@ -119,6 +120,11 @@ static void dp_test_send_link_training(struct dc_link *link) 1); link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate); + if (link_settings.link_rate == LINK_RATE_UNKNOWN) { + DC_LOG_ERROR("%s: Invalid test link rate.", __func__); + ASSERT(0); + } + /* Set preferred link settings */ link->verified_link_cap.lane_count = link_settings.lane_count; link->verified_link_cap.link_rate = link_settings.link_rate; @@ -457,7 +463,7 @@ static void set_crtc_test_pattern(struct dc_link *link, controller_color_space = pipe_ctx->stream_res.test_pattern_params.color_space; if (controller_color_space == CONTROLLER_DP_COLOR_SPACE_UDEFINED) { - DC_LOG_WARNING("%s: Color space must be defined for test pattern", __func__); + DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__); ASSERT(0); } @@ -592,6 +598,7 @@ bool dp_set_test_pattern( const unsigned char *p_custom_pattern, unsigned int cust_pattern_size) { + const struct link_hwss *link_hwss; struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; struct pipe_ctx *pipe_ctx = NULL; unsigned int lane; @@ -828,11 +835,9 @@ bool dp_set_test_pattern( pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); /* update MSA to requested color space */ - pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc, - &pipe_ctx->stream->timing, - color_space, - pipe_ctx->stream->use_vsc_sdp_for_colorimetry, - link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); + link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + pipe_ctx->stream->output_color_space = color_space; + link_hwss->setup_stream_attribute(pipe_ctx); if (pipe_ctx->stream->use_vsc_sdp_for_colorimetry) { if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 34bf8a9ef738..0c00e94e90b1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -184,14 +184,17 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) return false; } -static bool handle_hpd_irq_replay_sink(struct dc_link *link) +static void handle_hpd_irq_replay_sink(struct dc_link *link) { union dpcd_replay_configuration replay_configuration; /*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/ union psr_error_status replay_error_status; + if (link->replay_settings.config.force_disable_desync_error_check) + return; + if (!link->replay_settings.replay_feature_enabled) - return false; + return; dm_helpers_dp_read_dpcd( link->ctx, @@ -207,6 +210,9 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link) &replay_error_status.raw, sizeof(replay_error_status.raw)); + if (replay_configuration.bits.DESYNC_ERROR_STATUS) + link->replay_settings.config.received_desync_error_hpd = 1; + link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR = replay_error_status.bits.LINK_CRC_ERROR; link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR = @@ -243,7 +249,6 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link) edp_set_replay_allow_active(link, &allow_active, true, false, NULL); } } - return true; } void dp_handle_link_loss(struct dc_link *link) @@ -424,9 +429,7 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, /* PSR-related error was detected and handled */ return true; - if (handle_hpd_irq_replay_sink(link)) - /* Replay-related error was detected and handled */ - return true; + handle_hpd_irq_replay_sink(link); /* If PSR-related error handled, Main link may be off, * so do not handle as a normal sink status change interrupt. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index bc907ae2052d..ed4379c04715 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -583,6 +583,7 @@ union dmub_fw_boot_status { uint32_t fams_enabled : 1; /**< 1 if VBIOS data is deferred programmed */ uint32_t detection_required: 1; /**< if detection need to be triggered by driver */ uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */ + uint32_t ono_regions_enabled: 1; /**< 1 if ONO regions are enabled */ } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -599,6 +600,7 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_FAMS_ENABLED = (1 << 5), /**< 1 if FAMS is enabled*/ DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/ DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */ + DMUB_FW_BOOT_STATUS_BIT_ONO_REGIONS_ENABLED = (1 << 8), /**< 1 if ONO regions are enabled */ }; /* Register bit definition for SCRATCH5 */ @@ -617,9 +619,12 @@ enum dmub_lvtma_status_bit { }; enum dmub_ips_disable_type { - DMUB_IPS_DISABLE_IPS1 = 1, - DMUB_IPS_DISABLE_IPS2 = 2, - DMUB_IPS_DISABLE_IPS2_Z10 = 3, + DMUB_IPS_ENABLE = 0, + DMUB_IPS_DISABLE_ALL = 1, + DMUB_IPS_DISABLE_IPS1 = 2, + DMUB_IPS_DISABLE_IPS2 = 3, + DMUB_IPS_DISABLE_IPS2_Z10 = 4, + DMUB_IPS_DISABLE_DYNAMIC = 5, }; #define DMUB_IPS1_ALLOW_MASK 0x00000001 @@ -653,8 +658,8 @@ union dmub_fw_boot_options { uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ - uint32_t ips_disable: 2; /* options to disable ips support*/ - uint32_t reserved : 10; /**< reserved */ + uint32_t ips_disable: 3; /* options to disable ips support*/ + uint32_t reserved : 9; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; @@ -2098,7 +2103,7 @@ enum psr_version { /** * PSR not supported. */ - PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF, + PSR_VERSION_UNSUPPORTED = 0xFF, // psr_version field is only 8 bits wide }; /** @@ -3620,7 +3625,6 @@ struct dmub_cmd_abm_pause_data { uint8_t pad[1]; }; - /** * Definition of a DMUB_CMD__ABM_PAUSE command. */ @@ -4046,6 +4050,7 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__MALL command. */ struct dmub_rb_cmd_mall mall; + /** * Definition of a DMUB_CMD__CAB command. */ @@ -4067,6 +4072,7 @@ union dmub_rb_cmd { * Definition of DMUB_CMD__PANEL_CNTL commands. */ struct dmub_rb_cmd_panel_cntl panel_cntl; + /** * Definition of a DMUB_CMD__ABM_SET_PIPE command. */ @@ -4470,10 +4476,6 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; - /* Don't remove this. - * The contents need to actually be read from the ring buffer - * for this function to be effective. - */ for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) (void)READ_ONCE(*data++); @@ -4522,5 +4524,4 @@ static inline void dmub_rb_get_return_data(struct dmub_rb *rb, //============================================================================== //</DMUB_RB>==================================================================== //============================================================================== - #endif /* _DMUB_CMD_H_ */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h index 6e29a185de51..765d9ca2316f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h @@ -242,6 +242,34 @@ //MP0_SMN_C2PMSG_103 #define MP0_SMN_C2PMSG_103__CONTENT__SHIFT 0x0 #define MP0_SMN_C2PMSG_103__CONTENT_MASK 0xFFFFFFFFL +//MP0_SMN_C2PMSG_109 +#define MP0_SMN_C2PMSG_109__CONTENT__SHIFT 0x0 +#define MP0_SMN_C2PMSG_109__CONTENT_MASK 0xFFFFFFFFL +//MP0_SMN_C2PMSG_126 +#define MP0_SMN_C2PMSG_126__GPU_ERR_MEM_TRAINING__SHIFT 0x0 +#define MP0_SMN_C2PMSG_126__GPU_ERR_FW_LOAD__SHIFT 0x1 +#define MP0_SMN_C2PMSG_126__GPU_ERR_WAFL_LINK_TRAINING__SHIFT 0x2 +#define MP0_SMN_C2PMSG_126__GPU_ERR_XGMI_LINK_TRAINING__SHIFT 0x3 +#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_CP_LINK_TRAINING__SHIFT 0x4 +#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_DP_LINK_TRAINING__SHIFT 0x5 +#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_MEM_TEST__SHIFT 0x6 +#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_BIST_TEST__SHIFT 0x7 +#define MP0_SMN_C2PMSG_126__SOCKET_ID__SHIFT 0x8 +#define MP0_SMN_C2PMSG_126__AID_ID__SHIFT 0xb +#define MP0_SMN_C2PMSG_126__HBM_ID__SHIFT 0xd +#define MP0_SMN_C2PMSG_126__BOOT_STATUS__SHIFT 0x1f +#define MP0_SMN_C2PMSG_126__GPU_ERR_MEM_TRAINING_MASK 0x00000001L +#define MP0_SMN_C2PMSG_126__GPU_ERR_FW_LOAD_MASK 0x00000002L +#define MP0_SMN_C2PMSG_126__GPU_ERR_WAFL_LINK_TRAINING_MASK 0x00000004L +#define MP0_SMN_C2PMSG_126__GPU_ERR_XGMI_LINK_TRAINING_MASK 0x00000008L +#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_CP_LINK_TRAINING_MASK 0x00000010L +#define MP0_SMN_C2PMSG_126__GPU_ERR_USR_DP_LINK_TRAINING_MASK 0x00000020L +#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_MEM_TEST_MASK 0x00000040L +#define MP0_SMN_C2PMSG_126__GPU_ERR_HBM_BIST_TEST_MASK 0x00000080L +#define MP0_SMN_C2PMSG_126__SOCKET_ID_MASK 0x00000700L +#define MP0_SMN_C2PMSG_126__AID_ID_MASK 0x00001800L +#define MP0_SMN_C2PMSG_126__HBM_ID_MASK 0x00002000L +#define MP0_SMN_C2PMSG_126__BOOT_STATUS_MASK 0x80000000L //MP0_SMN_IH_CREDIT #define MP0_SMN_IH_CREDIT__CREDIT_VALUE__SHIFT 0x0 #define MP0_SMN_IH_CREDIT__CLIENT_ID__SHIFT 0x10 diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 3201808c2dd8..cd3c40a86029 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1080,33 +1080,35 @@ struct gpu_metrics_v3_0 { uint16_t average_ipu_activity[8]; /* time filtered per-core C0 residency % [0-100]*/ uint16_t average_core_c0_activity[16]; - /* time filtered DRAM read bandwidth [GB/sec] */ + /* time filtered DRAM read bandwidth [MB/sec] */ uint16_t average_dram_reads; - /* time filtered DRAM write bandwidth [GB/sec] */ + /* time filtered DRAM write bandwidth [MB/sec] */ uint16_t average_dram_writes; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; /* Power/Energy */ - /* average dGPU + APU power on A + A platform */ + /* time filtered power used for PPT/STAPM [APU+dGPU] [mW] */ uint32_t average_socket_power; - /* average IPU power [W] */ + /* time filtered IPU power [mW] */ uint16_t average_ipu_power; - /* average APU power [W] */ + /* time filtered APU power [mW] */ uint32_t average_apu_power; - /* average dGPU power [W] */ + /* time filtered GFX power [mW] */ + uint32_t average_gfx_power; + /* time filtered dGPU power [mW] */ uint32_t average_dgpu_power; - /* sum of core power across all cores in the socket [W] */ - uint32_t average_core_power; - /* calculated core power [W] */ - uint16_t core_power[16]; - /* maximum IRM defined STAPM power limit [W] */ + /* time filtered sum of core power across all cores in the socket [mW] */ + uint32_t average_all_core_power; + /* calculated core power [mW] */ + uint16_t average_core_power[16]; + /* maximum IRM defined STAPM power limit [mW] */ uint16_t stapm_power_limit; - /* time filtered STAPM power limit [W] */ + /* time filtered STAPM power limit [mW] */ uint16_t current_stapm_power_limit; - /* Average clocks */ + /* time filtered clocks [MHz] */ uint16_t average_gfxclk_frequency; uint16_t average_socclk_frequency; uint16_t average_vpeclk_frequency; @@ -1115,7 +1117,7 @@ struct gpu_metrics_v3_0 { uint16_t average_vclk_frequency; /* Current clocks */ - /* target core frequency */ + /* target core frequency [MHz] */ uint16_t current_coreclk[16]; /* CCLK frequency limit enforced on classic cores [MHz] */ uint16_t current_core_maxfreq; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index acf3527fff2d..08cb79401410 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -491,7 +491,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->get_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -505,7 +505,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->set_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -1182,7 +1182,7 @@ int amdgpu_dpm_get_sclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_sclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_sclk_od(adev->powerplay.pp_handle); @@ -1196,7 +1196,7 @@ int amdgpu_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_sclk_od) @@ -1219,7 +1219,7 @@ int amdgpu_dpm_get_mclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_mclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_mclk_od(adev->powerplay.pp_handle); @@ -1233,7 +1233,7 @@ int amdgpu_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_mclk_od) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 517b9fb4624c..ca2ece24e1e0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -989,12 +989,13 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, * Reading back the files will show you the available power levels within * the power state and the clock information for those levels. If deep sleep is * applied to a clock, the level will be denoted by a special level 'S:' - * E.g., - * S: 19Mhz * - * 0: 615Mhz - * 1: 800Mhz - * 2: 888Mhz - * 3: 1000Mhz + * E.g., :: + * + * S: 19Mhz * + * 0: 615Mhz + * 1: 800Mhz + * 2: 888Mhz + * 3: 1000Mhz * * * To manually adjust these states, first select manual using @@ -2197,6 +2198,22 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { + u32 limit; + + if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) == + -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { + if (gc_ver == IP_VERSION(9, 4, 2) || + gc_ver == IP_VERSION(9, 4, 3)) + *states = ATTR_STATE_UNSUPPORTED; } switch (gc_ver) { @@ -3288,10 +3305,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); uint32_t tmp; - /* under multi-vf mode, the hwmon attributes are all not supported */ - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return 0; - /* under pp one vf mode manage of hwmon attributes is not supported */ if (amdgpu_sriov_is_pp_one_vf(adev)) effective_mode &= ~S_IWUSR; @@ -4162,6 +4175,7 @@ err_out: int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { + enum amdgpu_sriov_vf_mode mode; uint32_t mask = 0; int ret; @@ -4173,17 +4187,21 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if (adev->pm.dpm_enabled == 0) return 0; - adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, - DRIVER_NAME, adev, - hwmon_groups); - if (IS_ERR(adev->pm.int_hwmon_dev)) { - ret = PTR_ERR(adev->pm.int_hwmon_dev); - dev_err(adev->dev, - "Unable to register hwmon device: %d\n", ret); - return ret; + mode = amdgpu_virt_get_sriov_vf_mode(adev); + + /* under multi-vf mode, the hwmon attributes are all not supported */ + if (mode != SRIOV_VF_MODE_MULTI_VF) { + adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, + DRIVER_NAME, adev, + hwmon_groups); + if (IS_ERR(adev->pm.int_hwmon_dev)) { + ret = PTR_ERR(adev->pm.int_hwmon_dev); + dev_err(adev->dev, "Unable to register hwmon device: %d\n", ret); + return ret; + } } - switch (amdgpu_virt_get_sriov_vf_mode(adev)) { + switch (mode) { case SRIOV_VF_MODE_ONE_VF: mask = ATTR_FLAG_ONEVF; break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 9e4f8a4104a3..914c15387157 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1022,6 +1022,9 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, *limit /= 100; } break; + case PP_PWR_LIMIT_MIN: + *limit = 0; + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h index 9fcad69a9f34..2cf2a7b12623 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h @@ -367,7 +367,7 @@ typedef struct _ATOM_Tonga_VCE_State_Record { typedef struct _ATOM_Tonga_VCE_State_Table { UCHAR ucRevId; UCHAR ucNumEntries; - ATOM_Tonga_VCE_State_Record entries[1]; + ATOM_Tonga_VCE_State_Record entries[]; } ATOM_Tonga_VCE_State_Table; typedef struct _ATOM_Tonga_PowerTune_Table { @@ -481,7 +481,7 @@ typedef struct _ATOM_Tonga_Hard_Limit_Record { typedef struct _ATOM_Tonga_Hard_Limit_Table { UCHAR ucRevId; UCHAR ucNumEntries; - ATOM_Tonga_Hard_Limit_Record entries[1]; + ATOM_Tonga_Hard_Limit_Record entries[]; } ATOM_Tonga_Hard_Limit_Table; typedef struct _ATOM_Tonga_GPIO_Table { diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h index 8b0590b834cc..de2926df5ed7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h @@ -129,7 +129,7 @@ typedef struct _ATOM_Vega10_State { typedef struct _ATOM_Vega10_State_Array { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_State states[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_State states[]; /* Dynamically allocate entries. */ } ATOM_Vega10_State_Array; typedef struct _ATOM_Vega10_CLK_Dependency_Record { @@ -169,37 +169,37 @@ typedef struct _ATOM_Vega10_GFXCLK_Dependency_Table { typedef struct _ATOM_Vega10_MCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_MCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_MCLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_MCLK_Dependency_Table; typedef struct _ATOM_Vega10_SOCCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_CLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_CLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_SOCCLK_Dependency_Table; typedef struct _ATOM_Vega10_DCEFCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_CLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_CLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_DCEFCLK_Dependency_Table; typedef struct _ATOM_Vega10_PIXCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_CLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_CLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_PIXCLK_Dependency_Table; typedef struct _ATOM_Vega10_DISPCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries.*/ - ATOM_Vega10_CLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_CLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_DISPCLK_Dependency_Table; typedef struct _ATOM_Vega10_PHYCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Vega10_CLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_CLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_PHYCLK_Dependency_Table; typedef struct _ATOM_Vega10_MM_Dependency_Record { @@ -213,7 +213,7 @@ typedef struct _ATOM_Vega10_MM_Dependency_Record { typedef struct _ATOM_Vega10_MM_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries */ - ATOM_Vega10_MM_Dependency_Record entries[1]; /* Dynamically allocate entries */ + ATOM_Vega10_MM_Dependency_Record entries[]; /* Dynamically allocate entries */ } ATOM_Vega10_MM_Dependency_Table; typedef struct _ATOM_Vega10_PCIE_Record { @@ -225,7 +225,7 @@ typedef struct _ATOM_Vega10_PCIE_Record { typedef struct _ATOM_Vega10_PCIE_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries */ - ATOM_Vega10_PCIE_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Vega10_PCIE_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Vega10_PCIE_Table; typedef struct _ATOM_Vega10_Voltage_Lookup_Record { @@ -235,7 +235,7 @@ typedef struct _ATOM_Vega10_Voltage_Lookup_Record { typedef struct _ATOM_Vega10_Voltage_Lookup_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries */ - ATOM_Vega10_Voltage_Lookup_Record entries[1]; /* Dynamically allocate entries */ + ATOM_Vega10_Voltage_Lookup_Record entries[]; /* Dynamically allocate entries */ } ATOM_Vega10_Voltage_Lookup_Table; typedef struct _ATOM_Vega10_Fan_Table { @@ -327,7 +327,7 @@ typedef struct _ATOM_Vega10_VCE_State_Record { typedef struct _ATOM_Vega10_VCE_State_Table { UCHAR ucRevId; UCHAR ucNumEntries; - ATOM_Vega10_VCE_State_Record entries[1]; + ATOM_Vega10_VCE_State_Record entries[]; } ATOM_Vega10_VCE_State_Table; typedef struct _ATOM_Vega10_PowerTune_Table { @@ -427,7 +427,7 @@ typedef struct _ATOM_Vega10_Hard_Limit_Record { typedef struct _ATOM_Vega10_Hard_Limit_Table { UCHAR ucRevId; UCHAR ucNumEntries; - ATOM_Vega10_Hard_Limit_Record entries[1]; + ATOM_Vega10_Hard_Limit_Record entries[]; } ATOM_Vega10_Hard_Limit_Table; typedef struct _Vega10_PPTable_Generic_SubTable_Header { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 9f86c1fecbb1..1ead323f1c78 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -733,7 +733,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_EXIT; + smu->smu_baco.state = SMU_BACO_STATE_NONE; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -1711,6 +1711,7 @@ static int smu_disable_dpms(struct smu_context *smu) } if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && + !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) && !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); @@ -1742,10 +1743,31 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } +static int smu_reset_mp1_state(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if ((!adev->in_runpm) && (!adev->in_suspend) && + (!amdgpu_in_reset(adev))) + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): + ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); + break; + default: + break; + } + + return ret; +} + static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1763,7 +1785,15 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - return smu_smc_hw_cleanup(smu); + ret = smu_smc_hw_cleanup(smu); + if (ret) + return ret; + + ret = smu_reset_mp1_state(smu); + if (ret) + return ret; + + return 0; } static void smu_late_fini(void *handle) @@ -2718,7 +2748,7 @@ unlock: static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit) @@ -2729,7 +2759,7 @@ static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) static int smu_set_apu_thermal_limit(void *handle, uint32_t limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 839553a86aa2..8def291b18bc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -419,6 +419,7 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, + SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index b483c8e096e7..22f88842a7fd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -150,97 +150,39 @@ typedef struct { } DpmClocks_t; typedef struct { - uint16_t CoreFrequency[16]; //Target core frequency [MHz] - uint16_t CorePower[16]; //CAC calculated core power [W] [Q8.8] - uint16_t CoreTemperature[16]; //TSEN measured core temperature [C] [Q8.8] - uint16_t GfxTemperature; //TSEN measured GFX temperature [C] [Q8.8] - uint16_t SocTemperature; //TSEN measured SOC temperature [C] [Q8.8] - uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [W] [Q8.8] - uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [W] [Q8.8] - uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] - uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] - uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [C] [Q8.8] - uint16_t AverageGfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] - uint16_t AverageFclkFrequency; //Time filtered target FCLK frequency [MHz] - uint16_t AverageGfxActivity; //Time filtered GFX busy % [0-100] [Q8.8] - uint16_t AverageSocclkFrequency; //Time filtered target SOCCLK frequency [MHz] - uint16_t AverageVclkFrequency; //Time filtered target VCLK frequency [MHz] - uint16_t AverageVcnActivity; //Time filtered VCN busy % [0-100] [Q8.8] - uint16_t AverageVpeclkFrequency; //Time filtered target VPECLK frequency [MHz] - uint16_t AverageIpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] - uint16_t AverageIpuBusy[8]; //Time filtered IPU per-column busy % [0-100] [Q8.8] - uint16_t AverageDRAMReads; //Time filtered DRAM read bandwidth [GB/sec] [Q8.8] - uint16_t AverageDRAMWrites; //Time filtered DRAM write bandwidth [GB/sec] [Q8.8] - uint16_t AverageCoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] [Q8.8] - uint16_t IpuPower; //Time filtered IPU power [W] [Q8.8] - uint32_t ApuPower; //Time filtered APU power [W] [Q24.8] - uint32_t dGpuPower; //Time filtered dGPU power [W] [Q24.8] - uint32_t AverageSocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [W] [Q24.8] - uint32_t AverageCorePower; //Time filtered sum of core power across all cores in the socket [W] [Q24.8] - uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] - uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint16_t CoreFrequency[16]; //Target core frequency [MHz] + uint16_t CorePower[16]; //CAC calculated core power [mW] + uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] + uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] + uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] + uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] + uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] + uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] + uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] + uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] + uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] + uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] + uint16_t GfxActivity; //Time filtered GFX busy % [0-100] + uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] + uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] + uint16_t VcnActivity; //Time filtered VCN busy % [0-100] + uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] + uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] + uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] + uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] + uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] + uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] + uint16_t IpuPower; //Time filtered IPU power [mW] + uint32_t ApuPower; //Time filtered APU power [mW] + uint32_t GfxPower; //Time filtered GFX power [mW] + uint32_t dGpuPower; //Time filtered dGPU power [mW] + uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] + uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] + uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] + uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint32_t spare[16]; } SmuMetrics_t; -typedef struct { - uint16_t GfxclkFrequency; //[MHz] - uint16_t SocclkFrequency; //[MHz] - uint16_t VclkFrequency; //[MHz] - uint16_t DclkFrequency; //[MHz] - uint16_t MemclkFrequency; //[MHz] - uint16_t spare; - uint16_t UvdActivity; //[centi] - uint16_t GfxActivity; //[centi] - - uint16_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_SOC - uint16_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_SOC - uint16_t Power[2]; //[mW] indices: VDDCR_VDD, VDDCR_SOC - - uint16_t CoreFrequency[8]; //[MHz] - uint16_t CorePower[8]; //[mW] - uint16_t CoreTemperature[8]; //[centi-Celsius] - uint16_t L3Frequency[2]; //[MHz] - uint16_t L3Temperature[2]; //[centi-Celsius] - - uint16_t spare2[24]; - - uint16_t GfxTemperature; //[centi-Celsius] - uint16_t SocTemperature; //[centi-Celsius] - uint16_t ThrottlerStatus; - - uint16_t CurrentSocketPower; //[mW] - uint16_t StapmOpnLimit; //[W] - uint16_t StapmCurrentLimit; //[W] - uint32_t ApuPower; //[mW] - uint32_t dGpuPower; //[mW] - - uint16_t VddTdcValue; //[mA] - uint16_t SocTdcValue; //[mA] - uint16_t VddEdcValue; //[mA] - uint16_t SocEdcValue; //[mA] - - uint16_t InfrastructureCpuMaxFreq; //[MHz] - uint16_t InfrastructureGfxMaxFreq; //[MHz] - - uint16_t SkinTemp; - uint16_t DeviceState; - uint16_t CurTemp; //[centi-Celsius] - uint16_t FilterAlphaValue; //[m] - - uint16_t AverageGfxclkFrequency; - uint16_t AverageFclkFrequency; - uint16_t AverageGfxActivity; - uint16_t AverageSocclkFrequency; - uint16_t AverageVclkFrequency; - uint16_t AverageVcnActivity; - uint16_t AverageDRAMReads; //Filtered DF Bandwidth::DRAM Reads - uint16_t AverageDRAMWrites; //Filtered DF Bandwidth::DRAM Writes - uint16_t AverageSocketPower; //Filtered value of CurrentSocketPower - uint16_t AverageCorePower[2]; //Filtered of [sum of CorePower[8] per ccx]) - uint16_t AverageCoreC0Residency[16]; //Filtered of [average C0 residency % per core] - uint16_t spare1; - uint32_t MetricsCounter; //Counts the # of metrics table parameter reads per update to the metrics table, i.e. if the metrics table update happens every 1 second, this value could be up to 1000 if the smu collected metrics data every cycle, or as low as 0 if the smu was asleep the whole time. Reset to 0 after writing. -} SmuMetrics_legacy_t; - //ISP tile definitions typedef enum { TILE_XTILE = 0, //ONO0 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index cc02f979e9e9..95cb919718ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -299,5 +299,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, uint8_t pcie_gen_cap, uint8_t pcie_width_cap); +int smu_v13_0_disable_pmfw_state(struct smu_context *smu); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 3efc6aed28f1..762b31455a0b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -234,24 +234,15 @@ static int vangogh_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF, sizeof(DpmActivityMonitorCoeffExt_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, max(sizeof(SmuMetrics_t), sizeof(SmuMetrics_legacy_t)), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - if (smu->smc_fw_if_version < 0x3) { - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_legacy_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL); - } else { - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); - } + smu_table->metrics_table = kzalloc(max(sizeof(SmuMetrics_t), sizeof(SmuMetrics_legacy_t)), GFP_KERNEL); if (!smu_table->metrics_table) goto err0_out; smu_table->metrics_time = 0; - if (smu->smc_fw_version >= 0x043F3E00) - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_3); - else - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2)); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err1_out; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a49e5adf7cc3..cf1b84060bc3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2477,3 +2477,16 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, return 0; } + +int smu_v13_0_disable_pmfw_state(struct smu_context *smu) +{ + int ret; + struct amdgpu_device *adev = smu->adev; + + WREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff), 0); + + ret = RREG32_PCIE(MP1_Public | + (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); + + return ret == 0 ? 0 : -EINVAL; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 34bd99b0e137..82c4e1f1c6f0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -354,12 +354,12 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) + if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO) { smu_baco->platform_support = true; - if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) - smu_baco->maco_support = true; + if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) + smu_baco->maco_support = true; + } if (!overdrive_lowerlimits->FeatureCtrlMask || !overdrive_upperlimits->FeatureCtrlMask) @@ -2530,38 +2530,10 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && - (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || - ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) { - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_COMPUTE_BIT, - (void *)(&activity_monitor_external), - false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } - - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - true); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); - return ret; - } - - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - PP_SMC_POWER_PROFILE_CUSTOM); - } else { - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, smu->power_profile_mode); - } if (workload_type < 0) return -EINVAL; @@ -2602,14 +2574,20 @@ static int smu_v13_0_0_baco_enter(struct smu_context *smu) static int smu_v13_0_0_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; + int ret; if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ usleep_range(10000, 11000); - return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); } else { - return smu_v13_0_baco_exit(smu); + ret = smu_v13_0_baco_exit(smu); } + + if (!ret) + adev->gfx.is_poweron = false; + + return ret; } static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) @@ -2794,7 +2772,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_set_mp1_state(smu, mp1_state); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_PrepareMp1ForUnload, + 0x55, NULL); + + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) + ret = smu_v13_0_disable_pmfw_state(smu); + break; default: /* Ignore others */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f42b48b31927..891605d4975f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -48,6 +48,7 @@ #include "smu_cmn.h" #include "mp/mp_13_0_6_offset.h" #include "mp/mp_13_0_6_sh_mask.h" +#include "umc_v12_0.h" #undef MP1_Public #undef smnMP1_FIRMWARE_FLAGS @@ -94,22 +95,11 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 #define LINK_SPEED_MAX 4 -#define SMU_13_0_6_DSCLK_THRESHOLD 100 +#define SMU_13_0_6_DSCLK_THRESHOLD 140 #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -enum mca_reg_idx { - MCA_REG_IDX_CONTROL = 0, - MCA_REG_IDX_STATUS = 1, - MCA_REG_IDX_ADDR = 2, - MCA_REG_IDX_MISC0 = 3, - MCA_REG_IDX_CONFIG = 4, - MCA_REG_IDX_IPID = 5, - MCA_REG_IDX_SYND = 6, - MCA_REG_IDX_COUNT = 16, -}; - struct mca_bank_ipid { enum amdgpu_mca_ip ip; uint16_t hwid; @@ -122,7 +112,9 @@ struct mca_ras_info { int *err_code_array; int err_code_count; int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count); + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); + bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry); }; #define P2S_TABLE_ID_A 0x50325341 @@ -270,7 +262,7 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; - char ucode_prefix[30]; + char ucode_prefix[15]; char fw_name[30]; /* No need to load P2S tables in IOV mode */ @@ -2305,7 +2297,7 @@ static int smu_v13_0_6_post_init(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) - return smu_v13_0_6_mca_set_debug_mode(smu, true); + return smu_v13_0_6_mca_set_debug_mode(smu, false); return 0; } @@ -2387,6 +2379,7 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT MCA_BANK_IPID(UMC, 0x96, 0x0), MCA_BANK_IPID(SMU, 0x01, 0x1), MCA_BANK_IPID(MP5, 0x01, 0x2), + MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0), }; static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) @@ -2448,53 +2441,60 @@ static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_t return 0; } -static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, int *ip) +static int mca_decode_ipid_to_hwip(uint64_t val) { const struct mca_bank_ipid *ipid; - uint64_t val; uint16_t hwid, mcatype; - int i, ret; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_IPID, &val); - if (ret) - return ret; + int i; hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType); - if (hwid) { - for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { - ipid = &smu_v13_0_6_mca_ipid_table[i]; + for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { + ipid = &smu_v13_0_6_mca_ipid_table[i]; - if (!ipid->hwid) - continue; + if (!ipid->hwid) + continue; - if (ipid->hwid == hwid && ipid->mcatype == mcatype) { - *ip = i; - return 0; - } - } + if (ipid->hwid == hwid && ipid->mcatype == mcatype) + return i; } - *ip = AMDGPU_MCA_IP_UNKNOW; + return AMDGPU_MCA_IP_UNKNOW; +} + +static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +{ + uint64_t status0; + + status0 = entry->regs[MCA_REG_IDX_STATUS]; + + if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { + *count = 0; + return 0; + } + + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + *count = 1; + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + *count = 1; return 0; } -static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) +static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, + uint32_t *count) { - uint64_t status0; - int ret; + u32 ext_error_code; - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); - if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) + if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) + *count = 1; + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) *count = 1; - else - *count = 0; return 0; } @@ -2515,70 +2515,41 @@ static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mc return false; } -static int mca_mp5_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) +static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { - uint64_t status0 = 0, misc0 = 0; - uint32_t errcode; - int ret; - - if (mca_ras->ip != AMDGPU_MCA_IP_MP5) - return -EINVAL; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + uint64_t status0, misc0; + status0 = entry->regs[MCA_REG_IDX_STATUS]; if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; return 0; } - errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode); - if (!mca_smu_check_error_code(adev, mca_ras, errcode)) - return 0; - if (type == AMDGPU_MCA_ERROR_TYPE_UE && REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { - if (count) - *count = 1; + *count = 1; return 0; - } - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0); - if (ret) - return ret; - - if (count) + } else { + misc0 = entry->regs[MCA_REG_IDX_MISC0]; *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); + } return 0; } static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { - uint64_t status0 = 0, misc0 = 0; - uint32_t errcode; - int ret; - - if (mca_ras->ip != AMDGPU_MCA_IP_SMU) - return -EINVAL; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + uint64_t status0, misc0; + status0 = entry->regs[MCA_REG_IDX_STATUS]; if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; return 0; } - errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode); - if (!mca_smu_check_error_code(adev, mca_ras, errcode)) - return 0; - if (type == AMDGPU_MCA_ERROR_TYPE_UE && REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { @@ -2587,16 +2558,43 @@ static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0); - if (ret) - return ret; - - if (count) - *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); + misc0 = entry->regs[MCA_REG_IDX_MISC0]; + *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); return 0; } +static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) +{ + uint32_t instlo; + + instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + switch (instlo) { + case 0x36430400: /* SMNAID XCD 0 */ + case 0x38430400: /* SMNAID XCD 1 */ + case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ + return true; + default: + return false; + } + + return false; +}; + +static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) +{ + uint32_t errcode, instlo; + + instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + if (instlo != 0x03b30400) + return false; + + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + return mca_smu_check_error_code(adev, mca_ras, errcode); +} + static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 }; static int mmhub_err_codes[] = { CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */ @@ -2608,23 +2606,30 @@ static const struct mca_ras_info mca_ras_table[] = { { .blkid = AMDGPU_RAS_BLOCK__UMC, .ip = AMDGPU_MCA_IP_UMC, - .get_err_count = mca_normal_mca_get_err_count, + .get_err_count = mca_umc_mca_get_err_count, }, { .blkid = AMDGPU_RAS_BLOCK__GFX, - .ip = AMDGPU_MCA_IP_MP5, - .get_err_count = mca_mp5_mca_get_err_count, + .ip = AMDGPU_MCA_IP_SMU, + .get_err_count = mca_gfx_mca_get_err_count, + .bank_is_valid = mca_gfx_smu_bank_is_valid, }, { .blkid = AMDGPU_RAS_BLOCK__SDMA, .ip = AMDGPU_MCA_IP_SMU, .err_code_array = sdma_err_codes, .err_code_count = ARRAY_SIZE(sdma_err_codes), .get_err_count = mca_smu_mca_get_err_count, + .bank_is_valid = mca_smu_bank_is_valid, }, { .blkid = AMDGPU_RAS_BLOCK__MMHUB, .ip = AMDGPU_MCA_IP_SMU, .err_code_array = mmhub_err_codes, .err_code_count = ARRAY_SIZE(mmhub_err_codes), .get_err_count = mca_smu_mca_get_err_count, + .bank_is_valid = mca_smu_bank_is_valid, + }, { + .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .ip = AMDGPU_MCA_IP_PCS_XGMI, + .get_err_count = mca_pcs_xgmi_mca_get_err_count, }, }; @@ -2659,130 +2664,84 @@ static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_e } static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, int idx) + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { - int ret, ip = AMDGPU_MCA_IP_UNKNOW; - - ret = mca_decode_mca_ipid(adev, type, idx, &ip); - if (ret) - return false; - - if (ip == AMDGPU_MCA_IP_UNKNOW) + if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip) return false; - return ip == mca_ras->ip; -} + if (mca_ras->bank_is_valid) + return mca_ras->bank_is_valid(mca_ras, adev, type, entry); -static int mca_get_valid_mca_idx(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, - uint32_t mca_cnt, int *idx_array, int idx_array_size) -{ - int i, idx_cnt = 0; - - for (i = 0; i < mca_cnt; i++) { - if (!mca_bank_is_valid(adev, mca_ras, type, i)) - continue; - - if (idx_array) { - if (idx_cnt < idx_array_size) - idx_array[idx_cnt] = i; - else - return -EINVAL; - } - - idx_cnt++; - } - - return idx_cnt; + return true; } -static int __mca_smu_get_error_count(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, enum amdgpu_mca_error_type type, uint32_t *count) +static int __mca_smu_get_ras_mca_set(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) { - uint32_t result, mca_cnt, total = 0; - int idx_array[16]; - int i, ret, idx_cnt = 0; + struct mca_bank_entry entry; + uint32_t mca_cnt; + int i, ret; ret = mca_get_valid_mca_count(adev, type, &mca_cnt); if (ret) return ret; /* if valid mca bank count is 0, the driver can return 0 directly */ - if (!mca_cnt) { - *count = 0; + if (!mca_cnt) return 0; - } - if (!mca_ras->get_err_count) - return -EINVAL; + for (i = 0; i < mca_cnt; i++) { + memset(&entry, 0, sizeof(entry)); + ret = mca_get_mca_entry(adev, type, i, &entry); + if (ret) + return ret; - idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, ARRAY_SIZE(idx_array)); - if (idx_cnt < 0) - return -EINVAL; + if (mca_ras && !mca_bank_is_valid(adev, mca_ras, type, &entry)) + continue; - for (i = 0; i < idx_cnt; i++) { - result = 0; - ret = mca_ras->get_err_count(mca_ras, adev, type, idx_array[i], &result); + ret = amdgpu_mca_bank_set_add_entry(mca_set, &entry); if (ret) return ret; - - total += result; } - *count = total; - return 0; } -static int mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count) +static int mca_smu_get_ras_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) { - const struct mca_ras_info *mca_ras; + const struct mca_ras_info *mca_ras = NULL; - if (!count) + if (!mca_set) return -EINVAL; - mca_ras = mca_get_mca_ras_info(adev, blk); - if (!mca_ras) - return -EOPNOTSUPP; - - return __mca_smu_get_error_count(adev, mca_ras, type, count); -} - -static int __mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size) -{ - uint32_t mca_cnt = 0; - int ret, idx_cnt = 0; - - ret = mca_get_valid_mca_count(adev, type, &mca_cnt); - if (ret) - return ret; - - /* if valid mca bank count is 0, the driver can return 0 directly */ - if (!mca_cnt) { - *idx_array_size = 0; - return 0; + if (blk != AMDGPU_RAS_BLOCK_COUNT) { + mca_ras = mca_get_mca_ras_info(adev, blk); + if (!mca_ras) + return -EOPNOTSUPP; } - idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, *idx_array_size); - if (idx_cnt < 0) - return -EINVAL; - - *idx_array_size = idx_cnt; - - return 0; + return __mca_smu_get_ras_mca_set(adev, mca_ras, type, mca_set); } -static int mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size) +static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count) { const struct mca_ras_info *mca_ras; + if (!entry || !count) + return -EINVAL; + mca_ras = mca_get_mca_ras_info(adev, blk); if (!mca_ras) return -EOPNOTSUPP; - return __mca_smu_get_ras_mca_idx_array(adev, mca_ras, type, idx_array, idx_array_size); + if (!mca_bank_is_valid(adev, mca_ras, type, entry)) { + *count = 0; + return 0; + } + + return mca_ras->get_err_count(mca_ras, adev, type, entry, count); } static int mca_smu_get_mca_entry(struct amdgpu_device *adev, @@ -2801,10 +2760,10 @@ static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { .max_ue_count = 12, .max_ce_count = 12, .mca_set_debug_mode = mca_smu_set_debug_mode, - .mca_get_error_count = mca_smu_get_error_count, + .mca_get_ras_mca_set = mca_smu_get_ras_mca_set, + .mca_parse_mca_error_count = mca_smu_parse_mca_error_count, .mca_get_mca_entry = mca_smu_get_mca_entry, .mca_get_valid_mca_count = mca_smu_get_valid_mca_count, - .mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array, }; static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index ac0e1cc812bd..81eafed76045 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -346,12 +346,13 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO) + if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO) { smu_baco->platform_support = true; - if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) - smu_baco->maco_support = true; + if ((powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO) + && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) + smu_baco->maco_support = true; + } if (!overdrive_lowerlimits->FeatureCtrlMask || !overdrive_upperlimits->FeatureCtrlMask) @@ -2498,7 +2499,13 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_set_mp1_state(smu, mp1_state); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_PrepareMp1ForUnload, + 0x55, NULL); + + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) + ret = smu_v13_0_disable_pmfw_state(smu); + break; default: /* Ignore others */ @@ -2524,14 +2531,20 @@ static int smu_v13_0_7_baco_enter(struct smu_context *smu) static int smu_v13_0_7_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; + int ret; if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ usleep_range(10000, 11000); - return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); } else { - return smu_v13_0_baco_exit(smu); + ret = smu_v13_0_baco_exit(smu); } + + if (!ret) + adev->gfx.is_poweron = false; + + return ret; } static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 4ac22f44d160..d8f8ad0e7137 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -57,7 +57,7 @@ int smu_v14_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; char fw_name[30]; - char ucode_prefix[30]; + char ucode_prefix[15]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -229,6 +229,8 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2; break; case IP_VERSION(14, 0, 0): + if ((smu->smc_fw_version < 0x5d3a00)) + dev_warn(smu->adev->dev, "The PMFW version(%x) is behind in this BIOS!\n", smu->smc_fw_version); smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0; break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index c36fc10b63c8..03b38c3a9968 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -156,15 +156,10 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - if (smu->smc_fw_version > 0x5d3500) { - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); - } else { - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_legacy_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_legacy_t), GFP_KERNEL); - } + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) goto err0_out; smu_table->metrics_time = 0; @@ -177,10 +172,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu) if (!smu_table->watermarks_table) goto err2_out; - if (smu->smc_fw_version > 0x5d3500) - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v3_0); - else - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_1); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v3_0); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err3_out; @@ -242,13 +234,13 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, switch (member) { case METRICS_AVERAGE_GFXCLK: - *value = metrics->AverageGfxclkFrequency; + *value = metrics->GfxclkFrequency; break; case METRICS_AVERAGE_SOCCLK: - *value = metrics->AverageSocclkFrequency; + *value = metrics->SocclkFrequency; break; case METRICS_AVERAGE_VCLK: - *value = metrics->AverageVclkFrequency; + *value = metrics->VclkFrequency; break; case METRICS_AVERAGE_DCLK: *value = 0; @@ -257,25 +249,25 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = 0; break; case METRICS_AVERAGE_FCLK: - *value = metrics->AverageFclkFrequency; + *value = metrics->FclkFrequency; break; case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->AverageGfxActivity >> 8; + *value = metrics->GfxActivity / 100; break; case METRICS_AVERAGE_VCNACTIVITY: - *value = metrics->AverageVcnActivity >> 8; + *value = metrics->VcnActivity / 100; break; case METRICS_AVERAGE_SOCKETPOWER: case METRICS_CURR_SOCKETPOWER: - *value = (metrics->AverageSocketPower & 0xff00) + - ((metrics->AverageSocketPower & 0xff) * 100 >> 8); + *value = (metrics->SocketPower / 1000 << 8) + + (metrics->SocketPower % 1000 / 10); break; case METRICS_TEMPERATURE_EDGE: - *value = (metrics->GfxTemperature >> 8) * + *value = metrics->GfxTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = (metrics->SocTemperature >> 8) * + *value = metrics->SocTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_THROTTLER_STATUS: @@ -317,107 +309,6 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, return ret; } -static int smu_v14_0_0_legacy_get_smu_metrics_data(struct smu_context *smu, - MetricsMember_t member, - uint32_t *value) -{ - struct smu_table_context *smu_table = &smu->smu_table; - - SmuMetrics_legacy_t *metrics = (SmuMetrics_legacy_t *)smu_table->metrics_table; - int ret = 0; - - ret = smu_cmn_get_metrics_table(smu, NULL, false); - if (ret) - return ret; - - switch (member) { - case METRICS_AVERAGE_GFXCLK: - *value = metrics->GfxclkFrequency; - break; - case METRICS_AVERAGE_SOCCLK: - *value = metrics->SocclkFrequency; - break; - case METRICS_AVERAGE_VCLK: - *value = metrics->VclkFrequency; - break; - case METRICS_AVERAGE_DCLK: - *value = metrics->DclkFrequency; - break; - case METRICS_AVERAGE_UCLK: - *value = metrics->MemclkFrequency; - break; - case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->GfxActivity / 100; - break; - case METRICS_AVERAGE_FCLK: - *value = metrics->AverageFclkFrequency; - break; - case METRICS_AVERAGE_VCNACTIVITY: - *value = metrics->UvdActivity; - break; - case METRICS_AVERAGE_SOCKETPOWER: - *value = (metrics->AverageSocketPower << 8) / 1000; - break; - case METRICS_CURR_SOCKETPOWER: - *value = (metrics->CurrentSocketPower << 8) / 1000; - break; - case METRICS_TEMPERATURE_EDGE: - *value = metrics->GfxTemperature / 100 * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_HOTSPOT: - *value = metrics->SocTemperature / 100 * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_THROTTLER_STATUS: - *value = metrics->ThrottlerStatus; - break; - case METRICS_VOLTAGE_VDDGFX: - *value = metrics->Voltage[0]; - break; - case METRICS_VOLTAGE_VDDSOC: - *value = metrics->Voltage[1]; - break; - case METRICS_SS_APU_SHARE: - /* return the percentage of APU power with respect to APU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. - */ - if (metrics->StapmOpnLimit > 0) - *value = (metrics->ApuPower * 100) / metrics->StapmOpnLimit; - else - *value = 0; - break; - case METRICS_SS_DGPU_SHARE: - /* return the percentage of dGPU power with respect to dGPU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. - */ - if ((metrics->dGpuPower > 0) && - (metrics->StapmCurrentLimit > metrics->StapmOpnLimit)) - *value = (metrics->dGpuPower * 100) / - (metrics->StapmCurrentLimit - metrics->StapmOpnLimit); - else - *value = 0; - break; - default: - *value = UINT_MAX; - break; - } - - return ret; -} - -static int smu_v14_0_0_common_get_smu_metrics_data(struct smu_context *smu, - MetricsMember_t member, - uint32_t *value) -{ - if (smu->smc_fw_version > 0x5d3500) - return smu_v14_0_0_get_smu_metrics_data(smu, member, value); - else - return smu_v14_0_0_legacy_get_smu_metrics_data(smu, member, value); -} - static int smu_v14_0_0_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) @@ -429,69 +320,69 @@ static int smu_v14_0_0_read_sensor(struct smu_context *smu, switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_AVERAGE_GFXACTIVITY, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_CURR_SOCKETPOWER, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_EDGE_TEMP: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_TEMPERATURE_EDGE, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_TEMPERATURE_HOTSPOT, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GFX_MCLK: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_AVERAGE_UCLK, (uint32_t *)data); *(uint32_t *)data *= 100; *size = 4; break; case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_AVERAGE_GFXCLK, (uint32_t *)data); *(uint32_t *)data *= 100; *size = 4; break; case AMDGPU_PP_SENSOR_VDDGFX: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDGFX, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_VDDNB: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDSOC, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_SS_APU_SHARE: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_SS_APU_SHARE, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_SS_DGPU_SHARE: - ret = smu_v14_0_0_common_get_smu_metrics_data(smu, + ret = smu_v14_0_0_get_smu_metrics_data(smu, METRICS_SS_DGPU_SHARE, (uint32_t *)data); *size = 4; @@ -588,7 +479,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, if (ret) return ret; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 3, 0); gpu_metrics->temperature_gfx = metrics.GfxTemperature; gpu_metrics->temperature_soc = metrics.SocTemperature; @@ -597,32 +488,33 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, sizeof(uint16_t) * 16); gpu_metrics->temperature_skin = metrics.SkinTemp; - gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; - gpu_metrics->average_vcn_activity = metrics.AverageVcnActivity; + gpu_metrics->average_gfx_activity = metrics.GfxActivity; + gpu_metrics->average_vcn_activity = metrics.VcnActivity; memcpy(&gpu_metrics->average_ipu_activity[0], - &metrics.AverageIpuBusy[0], + &metrics.IpuBusy[0], sizeof(uint16_t) * 8); memcpy(&gpu_metrics->average_core_c0_activity[0], - &metrics.AverageCoreC0Residency[0], + &metrics.CoreC0Residency[0], sizeof(uint16_t) * 16); - gpu_metrics->average_dram_reads = metrics.AverageDRAMReads; - gpu_metrics->average_dram_writes = metrics.AverageDRAMWrites; + gpu_metrics->average_dram_reads = metrics.DRAMReads; + gpu_metrics->average_dram_writes = metrics.DRAMWrites; - gpu_metrics->average_socket_power = metrics.AverageSocketPower; + gpu_metrics->average_socket_power = metrics.SocketPower; gpu_metrics->average_ipu_power = metrics.IpuPower; gpu_metrics->average_apu_power = metrics.ApuPower; + gpu_metrics->average_gfx_power = metrics.GfxPower; gpu_metrics->average_dgpu_power = metrics.dGpuPower; - gpu_metrics->average_core_power = metrics.AverageCorePower; - memcpy(&gpu_metrics->core_power[0], + gpu_metrics->average_all_core_power = metrics.AllCorePower; + memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 16); - gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency; - gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency; - gpu_metrics->average_vpeclk_frequency = metrics.AverageVpeclkFrequency; - gpu_metrics->average_fclk_frequency = metrics.AverageFclkFrequency; - gpu_metrics->average_vclk_frequency = metrics.AverageVclkFrequency; - gpu_metrics->average_ipuclk_frequency = metrics.AverageIpuclkFrequency; + gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; + gpu_metrics->average_vpeclk_frequency = metrics.VpeclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.FclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; + gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], @@ -638,68 +530,6 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v3_0); } -static ssize_t smu_v14_0_0_get_legacy_gpu_metrics(struct smu_context *smu, - void **table) -{ - struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v2_1 *gpu_metrics = - (struct gpu_metrics_v2_1 *)smu_table->gpu_metrics_table; - SmuMetrics_legacy_t metrics; - int ret = 0; - - ret = smu_cmn_get_metrics_table(smu, &metrics, true); - if (ret) - return ret; - - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 1); - - gpu_metrics->temperature_gfx = metrics.GfxTemperature; - gpu_metrics->temperature_soc = metrics.SocTemperature; - memcpy(&gpu_metrics->temperature_core[0], - &metrics.CoreTemperature[0], - sizeof(uint16_t) * 8); - gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0]; - gpu_metrics->temperature_l3[1] = metrics.L3Temperature[1]; - - gpu_metrics->average_gfx_activity = metrics.GfxActivity; - gpu_metrics->average_mm_activity = metrics.UvdActivity; - - gpu_metrics->average_socket_power = metrics.CurrentSocketPower; - gpu_metrics->average_gfx_power = metrics.Power[0]; - gpu_metrics->average_soc_power = metrics.Power[1]; - memcpy(&gpu_metrics->average_core_power[0], - &metrics.CorePower[0], - sizeof(uint16_t) * 8); - - gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; - gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; - gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; - gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency; - gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; - gpu_metrics->average_dclk_frequency = metrics.DclkFrequency; - - memcpy(&gpu_metrics->current_coreclk[0], - &metrics.CoreFrequency[0], - sizeof(uint16_t) * 8); - - gpu_metrics->throttle_status = metrics.ThrottlerStatus; - gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); - - *table = (void *)gpu_metrics; - - return sizeof(struct gpu_metrics_v2_1); -} - -static ssize_t smu_v14_0_0_common_get_gpu_metrics(struct smu_context *smu, - void **table) -{ - - if (smu->smc_fw_version > 0x5d3500) - return smu_v14_0_0_get_gpu_metrics(smu, table); - else - return smu_v14_0_0_get_legacy_gpu_metrics(smu, table); -} - static int smu_v14_0_0_mode2_reset(struct smu_context *smu) { int ret; @@ -928,7 +758,7 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu, return -EINVAL; } - return smu_v14_0_0_common_get_smu_metrics_data(smu, member_type, value); + return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value); } static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu, @@ -1230,7 +1060,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .read_sensor = smu_v14_0_0_read_sensor, .is_dpm_running = smu_v14_0_0_is_dpm_running, .set_watermarks_table = smu_v14_0_0_set_watermarks_table, - .get_gpu_metrics = smu_v14_0_0_common_get_gpu_metrics, + .get_gpu_metrics = smu_v14_0_0_get_gpu_metrics, .get_enabled_mask = smu_cmn_get_enabled_mask, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_driver_table_location = smu_v14_0_set_driver_table_location, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 6e57c94379a9..001a5cf09657 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1004,6 +1004,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(2, 4): structure_size = sizeof(struct gpu_metrics_v2_4); break; + case METRICS_VERSION(3, 0): + structure_size = sizeof(struct gpu_metrics_v3_0); + break; default: return; } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 84148a79414b..c45c07840f64 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1580,7 +1580,6 @@ static const struct pwm_ops ti_sn_pwm_ops = { .free = ti_sn_pwm_free, .apply = ti_sn_pwm_apply, .get_state = ti_sn_pwm_get_state, - .owner = THIS_MODULE, }; static int ti_sn_pwm_probe(struct auxiliary_device *adev, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f7003d1ec5ef..01da6789d044 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1069,7 +1069,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, fence = drm_syncobj_fence_get(syncobjs[i]); if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) { dma_fence_put(fence); - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { continue; } else { timeout = -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6d7ba4d0f130..c4839c67cb0f 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2750,6 +2750,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) for_each_pipe(dev_priv, pipe) min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + /* + * Avoid glk_force_audio_cdclk() causing excessive screen + * blinking when multiple pipes are active by making sure + * CDCLK frequency is always high enough for audio. With a + * single active pipe we can always change CDCLK frequency + * by changing the cd2x divider (see glk_cdclk_table[]) and + * thus a full modeset won't be needed then. + */ + if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes && + !is_power_of_2(cdclk_state->active_pipes)) + min_cdclk = max(2 * 96000, min_cdclk); + if (min_cdclk > dev_priv->display.cdclk.max_cdclk_freq) { drm_dbg_kms(&dev_priv->drm, "required cdclk (%d kHz) exceeds max (%d kHz)\n", diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1891c0cc187d..2c1034578984 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -430,7 +430,7 @@ static int mtl_max_source_rate(struct intel_dp *intel_dp) enum phy phy = intel_port_to_phy(i915, dig_port->base.port); if (intel_is_c10phy(i915, phy)) - return intel_dp_is_edp(intel_dp) ? 675000 : 810000; + return 810000; return 2000000; } diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 37b0f8529b4f..f64d348a969e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -58,7 +58,7 @@ struct intel_tc_port { struct delayed_work link_reset_work; int link_refcount; bool legacy_port:1; - char port_name[8]; + const char *port_name; enum tc_port_mode mode; enum tc_port_mode init_mode; enum phy_fia phy_fia; @@ -1875,8 +1875,12 @@ int intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) else tc->phy_ops = &icl_tc_phy_ops; - snprintf(tc->port_name, sizeof(tc->port_name), - "%c/TC#%d", port_name(port), tc_port + 1); + tc->port_name = kasprintf(GFP_KERNEL, "%c/TC#%d", port_name(port), + tc_port + 1); + if (!tc->port_name) { + kfree(tc); + return -ENOMEM; + } mutex_init(&tc->lock); /* TODO: Combine the two works */ @@ -1897,6 +1901,7 @@ void intel_tc_port_cleanup(struct intel_digital_port *dig_port) { intel_tc_port_suspend(dig_port); + kfree(dig_port->tc->port_name); kfree(dig_port->tc); dig_port->tc = NULL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9a9ff84c90d7..e38f06a6e56e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, if (idx >= pc->num_user_engines) return -EINVAL; + idx = array_index_nospec(idx, pc->num_user_engines); pe = &pc->user_engines[idx]; /* Only render engine supports RPCS configuration. */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 1c93e84278a0..15fc8e4703f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -195,6 +195,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } +static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) +{ + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) + return true; + + return false; +} + static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -202,8 +217,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) /* * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. + * + * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + if (needs_wc_ggtt_mapping(ggtt->vm.i915)) + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, + GFX_FLSH_CNTL_EN); } static void guc_ggtt_ct_invalidate(struct intel_gt *gt) @@ -1140,17 +1159,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else + if (needs_wc_ggtt_mapping(i915)) ggtt->gsm = ioremap_wc(phys_addr, size); + else + ggtt->gsm = ioremap(phys_addr, size); + if (!ggtt->gsm) { drm_err(&i915->drm, "Failed to map the ggtt page table\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8b67abd720be..7090e4be29cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -581,19 +581,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) static void rc6_res_reg_init(struct intel_rc6 *rc6) { - memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); + i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { + [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, + }; switch (rc6_to_gt(rc6)->type) { case GT_MEDIA: - rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; + res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; break; default: - rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; - rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; - rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; - rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; + res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; + res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; + res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; + res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; break; } + + memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); } void intel_rc6_init(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 614bde321589..8bca02025e09 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -38,10 +38,13 @@ static int i915_param_int_open(struct inode *inode, struct file *file) static int notify_guc(struct drm_i915_private *i915) { - int ret = 0; + struct intel_gt *gt; + int i, ret = 0; - if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) - ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc); + for_each_gt(gt, i915, i) { + if (intel_uc_uses_guc_submission(>->uc)) + ret = intel_guc_global_policies_update(>->uc.guc); + } return ret; } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2f3ecd7d4804..7b1c8de2f9cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4227,11 +4227,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, u32 known_open_flags; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | @@ -4607,11 +4604,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_reg *regs; int err, id; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (!perf->metrics_kobj) { drm_dbg(&perf->i915->drm, @@ -4773,11 +4767,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_config *oa_config; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (i915_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&perf->i915->drm, diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 6492a70e3c39..404b0483bb7c 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1229,6 +1229,9 @@ int qxl_destroy_monitors_object(struct qxl_device *qdev) if (!qdev->monitors_config_bo) return 0; + kfree(qdev->dumb_heads); + qdev->dumb_heads = NULL; + qdev->monitors_config = NULL; qdev->ram_header->monitors_config = 0; diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 8a6621f1e82c..2db40789235c 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -3893,7 +3893,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT typedef struct _ATOM_GPIO_PIN_LUT { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1]; + ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[]; }ATOM_GPIO_PIN_LUT; /****************************************************************************/ @@ -4061,7 +4061,7 @@ typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT //usSrcDstTableOffset UCHAR ucNumberOfSrc; USHORT usSrcObjectID[1]; UCHAR ucNumberOfDst; - USHORT usDstObjectID[1]; + USHORT usDstObjectID[]; }ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT; @@ -4233,7 +4233,7 @@ typedef struct _ATOM_CONNECTOR_DEVICE_TAG_RECORD ATOM_COMMON_RECORD_HEADER sheader; UCHAR ucNumberOfDevice; UCHAR ucReserved; - ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[1]; //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation + ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[]; //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation }ATOM_CONNECTOR_DEVICE_TAG_RECORD; @@ -4293,7 +4293,7 @@ typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD ATOM_COMMON_RECORD_HEADER sheader; UCHAR ucFlags; // Future expnadibility UCHAR ucNumberOfPins; // Number of GPIO pins used to control the object - ATOM_GPIO_PIN_CONTROL_PAIR asGpio[1]; // the real gpio pin pair determined by number of pins ucNumberOfPins + ATOM_GPIO_PIN_CONTROL_PAIR asGpio[]; // the real gpio pin pair determined by number of pins ucNumberOfPins }ATOM_OBJECT_GPIO_CNTL_RECORD; //Definitions for GPIO pin state @@ -4444,7 +4444,7 @@ typedef struct _ATOM_BRACKET_LAYOUT_RECORD UCHAR ucWidth; UCHAR ucConnNum; UCHAR ucReserved; - ATOM_CONNECTOR_LAYOUT_INFO asConnInfo[1]; + ATOM_CONNECTOR_LAYOUT_INFO asConnInfo[]; }ATOM_BRACKET_LAYOUT_RECORD; /****************************************************************************/ @@ -4600,7 +4600,7 @@ typedef struct _ATOM_I2C_VOLTAGE_OBJECT_V3 UCHAR ucVoltageControlAddress; UCHAR ucVoltageControlOffset; ULONG ulReserved; - VOLTAGE_LUT_ENTRY asVolI2cLut[1]; // end with 0xff + VOLTAGE_LUT_ENTRY asVolI2cLut[]; // end with 0xff }ATOM_I2C_VOLTAGE_OBJECT_V3; // ATOM_I2C_VOLTAGE_OBJECT_V3.ucVoltageControlFlag @@ -4625,7 +4625,7 @@ typedef struct _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 UCHAR ucLeakageEntryNum; // indicate the entry number of LeakageId/Voltage Lut table UCHAR ucReserved[2]; ULONG ulMaxVoltageLevel; - LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1]; + LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[]; }ATOM_LEAKAGE_VOLTAGE_OBJECT_V3; @@ -4753,7 +4753,7 @@ typedef struct _ATOM_POWER_SOURCE_INFO { ATOM_COMMON_TABLE_HEADER asHeader; UCHAR asPwrbehave[16]; - ATOM_POWER_SOURCE_OBJECT asPwrObj[1]; + ATOM_POWER_SOURCE_OBJECT asPwrObj[]; }ATOM_POWER_SOURCE_INFO; @@ -5440,7 +5440,7 @@ typedef struct _ATOM_FUSION_SYSTEM_INFO_V2 typedef struct _ATOM_I2C_DATA_RECORD { UCHAR ucNunberOfBytes; //Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop" - UCHAR ucI2CData[1]; //I2C data in bytes, should be less than 16 bytes usually + UCHAR ucI2CData[]; //I2C data in bytes, should be less than 16 bytes usually }ATOM_I2C_DATA_RECORD; @@ -5451,14 +5451,14 @@ typedef struct _ATOM_I2C_DEVICE_SETUP_INFO UCHAR ucSSChipID; //SS chip being used UCHAR ucSSChipSlaveAddr; //Slave Address to set up this SS chip UCHAR ucNumOfI2CDataRecords; //number of data block - ATOM_I2C_DATA_RECORD asI2CData[1]; + ATOM_I2C_DATA_RECORD asI2CData[]; }ATOM_I2C_DEVICE_SETUP_INFO; //========================================================================================== typedef struct _ATOM_ASIC_MVDD_INFO { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[1]; + ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[]; }ATOM_ASIC_MVDD_INFO; //========================================================================================== @@ -5520,7 +5520,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V2 { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_ASIC_SS_ASSIGNMENT_V2 asSpreadSpectrum[1]; //this is point only. + ATOM_ASIC_SS_ASSIGNMENT_V2 asSpreadSpectrum[]; //this is point only. }ATOM_ASIC_INTERNAL_SS_INFO_V2; typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3 @@ -5542,7 +5542,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 { ATOM_COMMON_TABLE_HEADER sHeader; - ATOM_ASIC_SS_ASSIGNMENT_V3 asSpreadSpectrum[1]; //this is pointer only. + ATOM_ASIC_SS_ASSIGNMENT_V3 asSpreadSpectrum[]; //this is pointer only. }ATOM_ASIC_INTERNAL_SS_INFO_V3; @@ -6282,7 +6282,7 @@ typedef union _ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS typedef struct _ATOM_MEMORY_SETTING_DATA_BLOCK{ ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS ulMemoryID; - ULONG aulMemData[1]; + ULONG aulMemData[]; }ATOM_MEMORY_SETTING_DATA_BLOCK; @@ -7092,7 +7092,7 @@ typedef struct _ATOM_DISP_OUT_INFO_V3 UCHAR ucCoreRefClkSource; // value of CORE_REF_CLK_SOURCE UCHAR ucDispCaps; UCHAR ucReserved[2]; - ASIC_TRANSMITTER_INFO_V2 asTransmitterInfo[1]; // for alligment only + ASIC_TRANSMITTER_INFO_V2 asTransmitterInfo[]; // for alligment only }ATOM_DISP_OUT_INFO_V3; //ucDispCaps @@ -7324,12 +7324,12 @@ typedef struct _CLOCK_CONDITION_SETTING_ENTRY{ USHORT usMaxClockFreq; UCHAR ucEncodeMode; UCHAR ucPhySel; - ULONG ulAnalogSetting[1]; + ULONG ulAnalogSetting[]; }CLOCK_CONDITION_SETTING_ENTRY; typedef struct _CLOCK_CONDITION_SETTING_INFO{ USHORT usEntrySize; - CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[1]; + CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[]; }CLOCK_CONDITION_SETTING_INFO; typedef struct _PHY_CONDITION_REG_VAL{ @@ -7346,27 +7346,27 @@ typedef struct _PHY_CONDITION_REG_VAL_V2{ typedef struct _PHY_CONDITION_REG_INFO{ USHORT usRegIndex; USHORT usSize; - PHY_CONDITION_REG_VAL asRegVal[1]; + PHY_CONDITION_REG_VAL asRegVal[]; }PHY_CONDITION_REG_INFO; typedef struct _PHY_CONDITION_REG_INFO_V2{ USHORT usRegIndex; USHORT usSize; - PHY_CONDITION_REG_VAL_V2 asRegVal[1]; + PHY_CONDITION_REG_VAL_V2 asRegVal[]; }PHY_CONDITION_REG_INFO_V2; typedef struct _PHY_ANALOG_SETTING_INFO{ UCHAR ucEncodeMode; UCHAR ucPhySel; USHORT usSize; - PHY_CONDITION_REG_INFO asAnalogSetting[1]; + PHY_CONDITION_REG_INFO asAnalogSetting[]; }PHY_ANALOG_SETTING_INFO; typedef struct _PHY_ANALOG_SETTING_INFO_V2{ UCHAR ucEncodeMode; UCHAR ucPhySel; USHORT usSize; - PHY_CONDITION_REG_INFO_V2 asAnalogSetting[1]; + PHY_CONDITION_REG_INFO_V2 asAnalogSetting[]; }PHY_ANALOG_SETTING_INFO_V2; typedef struct _GFX_HAVESTING_PARAMETERS { diff --git a/drivers/gpu/drm/renesas/shmobile/Kconfig b/drivers/gpu/drm/renesas/shmobile/Kconfig index ad14112999ad..027220b8fe1c 100644 --- a/drivers/gpu/drm/renesas/shmobile/Kconfig +++ b/drivers/gpu/drm/renesas/shmobile/Kconfig @@ -1,11 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 config DRM_SHMOBILE tristate "DRM Support for SH Mobile" - depends on DRM + depends on DRM && PM depends on ARCH_RENESAS || ARCH_SHMOBILE || COMPILE_TEST select BACKLIGHT_CLASS_DEVICE select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER + select VIDEOMODE_HELPERS help Choose this option if you have an SH Mobile chipset. If M is selected the module will be called shmob-drm. diff --git a/drivers/gpu/drm/renesas/shmobile/Makefile b/drivers/gpu/drm/renesas/shmobile/Makefile index 861edafed856..2679555d61a7 100644 --- a/drivers/gpu/drm/renesas/shmobile/Makefile +++ b/drivers/gpu/drm/renesas/shmobile/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -shmob-drm-y := shmob_drm_backlight.o \ - shmob_drm_crtc.o \ +shmob-drm-y := shmob_drm_crtc.o \ shmob_drm_drv.o \ shmob_drm_kms.o \ shmob_drm_plane.o diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c deleted file mode 100644 index 794573badfe8..000000000000 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * shmob_drm_backlight.c -- SH Mobile DRM Backlight - * - * Copyright (C) 2012 Renesas Electronics Corporation - * - * Laurent Pinchart (laurent.pinchart@ideasonboard.com) - */ - -#include <linux/backlight.h> - -#include "shmob_drm_backlight.h" -#include "shmob_drm_crtc.h" -#include "shmob_drm_drv.h" - -static int shmob_drm_backlight_update(struct backlight_device *bdev) -{ - struct shmob_drm_connector *scon = bl_get_data(bdev); - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - int brightness = backlight_get_brightness(bdev); - - return bdata->set_brightness(brightness); -} - -static int shmob_drm_backlight_get_brightness(struct backlight_device *bdev) -{ - struct shmob_drm_connector *scon = bl_get_data(bdev); - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - - return bdata->get_brightness(); -} - -static const struct backlight_ops shmob_drm_backlight_ops = { - .options = BL_CORE_SUSPENDRESUME, - .update_status = shmob_drm_backlight_update, - .get_brightness = shmob_drm_backlight_get_brightness, -}; - -void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode) -{ - if (scon->backlight == NULL) - return; - - scon->backlight->props.power = mode == DRM_MODE_DPMS_ON - ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; - backlight_update_status(scon->backlight); -} - -int shmob_drm_backlight_init(struct shmob_drm_connector *scon) -{ - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - struct drm_connector *connector = &scon->connector; - struct drm_device *dev = connector->dev; - struct backlight_device *backlight; - - if (!bdata->max_brightness) - return 0; - - backlight = backlight_device_register(bdata->name, dev->dev, scon, - &shmob_drm_backlight_ops, NULL); - if (IS_ERR(backlight)) { - dev_err(dev->dev, "unable to register backlight device: %ld\n", - PTR_ERR(backlight)); - return PTR_ERR(backlight); - } - - backlight->props.max_brightness = bdata->max_brightness; - backlight->props.brightness = bdata->max_brightness; - backlight->props.power = FB_BLANK_POWERDOWN; - backlight_update_status(backlight); - - scon->backlight = backlight; - return 0; -} - -void shmob_drm_backlight_exit(struct shmob_drm_connector *scon) -{ - backlight_device_unregister(scon->backlight); -} diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h deleted file mode 100644 index d9abb7a60be5..000000000000 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * shmob_drm_backlight.h -- SH Mobile DRM Backlight - * - * Copyright (C) 2012 Renesas Electronics Corporation - * - * Laurent Pinchart (laurent.pinchart@ideasonboard.com) - */ - -#ifndef __SHMOB_DRM_BACKLIGHT_H__ -#define __SHMOB_DRM_BACKLIGHT_H__ - -struct shmob_drm_connector; - -void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode); -int shmob_drm_backlight_init(struct shmob_drm_connector *scon); -void shmob_drm_backlight_exit(struct shmob_drm_connector *scon); - -#endif /* __SHMOB_DRM_BACKLIGHT_H__ */ diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c index 11dd2bc803e7..2e2f37b9d0a4 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c @@ -7,9 +7,18 @@ * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ -#include <linux/backlight.h> #include <linux/clk.h> - +#include <linux/media-bus-format.h> +#include <linux/of.h> +#include <linux/of_graph.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_atomic_state_helper.h> +#include <drm/drm_atomic_uapi.h> +#include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_dma_helper.h> @@ -18,85 +27,123 @@ #include <drm/drm_gem_dma_helper.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_plane_helper.h> +#include <drm/drm_panel.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> #include <drm/drm_vblank.h> -#include "shmob_drm_backlight.h" +#include <video/videomode.h> + #include "shmob_drm_crtc.h" #include "shmob_drm_drv.h" #include "shmob_drm_kms.h" #include "shmob_drm_plane.h" #include "shmob_drm_regs.h" -/* - * TODO: panel support - */ - /* ----------------------------------------------------------------------------- - * Clock management + * Page Flip */ -static int shmob_drm_clk_on(struct shmob_drm_device *sdev) +void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc) { - int ret; + struct drm_pending_vblank_event *event; + struct drm_device *dev = scrtc->base.dev; + unsigned long flags; - if (sdev->clock) { - ret = clk_prepare_enable(sdev->clock); - if (ret < 0) - return ret; + spin_lock_irqsave(&dev->event_lock, flags); + event = scrtc->event; + scrtc->event = NULL; + if (event) { + drm_crtc_send_vblank_event(&scrtc->base, event); + wake_up(&scrtc->flip_wait); + drm_crtc_vblank_put(&scrtc->base); } + spin_unlock_irqrestore(&dev->event_lock, flags); +} - return 0; +static bool shmob_drm_crtc_page_flip_pending(struct shmob_drm_crtc *scrtc) +{ + struct drm_device *dev = scrtc->base.dev; + unsigned long flags; + bool pending; + + spin_lock_irqsave(&dev->event_lock, flags); + pending = scrtc->event != NULL; + spin_unlock_irqrestore(&dev->event_lock, flags); + + return pending; } -static void shmob_drm_clk_off(struct shmob_drm_device *sdev) +static void shmob_drm_crtc_wait_page_flip(struct shmob_drm_crtc *scrtc) { - if (sdev->clock) - clk_disable_unprepare(sdev->clock); + struct drm_crtc *crtc = &scrtc->base; + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); + + if (wait_event_timeout(scrtc->flip_wait, + !shmob_drm_crtc_page_flip_pending(scrtc), + msecs_to_jiffies(50))) + return; + + dev_warn(sdev->dev, "page flip timeout\n"); + + shmob_drm_crtc_finish_page_flip(scrtc); } /* ----------------------------------------------------------------------------- * CRTC */ +static const struct { + u32 fmt; + u32 ldmt1r; +} shmob_drm_bus_fmts[] = { + { MEDIA_BUS_FMT_RGB888_3X8, LDMT1R_MIFTYP_RGB8 }, + { MEDIA_BUS_FMT_RGB666_2X9_BE, LDMT1R_MIFTYP_RGB9 }, + { MEDIA_BUS_FMT_RGB888_2X12_BE, LDMT1R_MIFTYP_RGB12A }, + { MEDIA_BUS_FMT_RGB444_1X12, LDMT1R_MIFTYP_RGB12B }, + { MEDIA_BUS_FMT_RGB565_1X16, LDMT1R_MIFTYP_RGB16 }, + { MEDIA_BUS_FMT_RGB666_1X18, LDMT1R_MIFTYP_RGB18 }, + { MEDIA_BUS_FMT_RGB888_1X24, LDMT1R_MIFTYP_RGB24 }, + { MEDIA_BUS_FMT_UYVY8_1X16, LDMT1R_MIFTYP_YCBCR }, +}; + static void shmob_drm_crtc_setup_geometry(struct shmob_drm_crtc *scrtc) { - struct drm_crtc *crtc = &scrtc->crtc; - struct shmob_drm_device *sdev = crtc->dev->dev_private; - const struct shmob_drm_interface_data *idata = &sdev->pdata->iface; + struct drm_crtc *crtc = &scrtc->base; + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); + const struct drm_display_info *info = &sdev->connector->display_info; const struct drm_display_mode *mode = &crtc->mode; + unsigned int i; u32 value; - value = sdev->ldmt1r - | ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : LDMT1R_VPOL) - | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : LDMT1R_HPOL) - | ((idata->flags & SHMOB_DRM_IFACE_FL_DWPOL) ? LDMT1R_DWPOL : 0) - | ((idata->flags & SHMOB_DRM_IFACE_FL_DIPOL) ? LDMT1R_DIPOL : 0) - | ((idata->flags & SHMOB_DRM_IFACE_FL_DAPOL) ? LDMT1R_DAPOL : 0) - | ((idata->flags & SHMOB_DRM_IFACE_FL_HSCNT) ? LDMT1R_HSCNT : 0) - | ((idata->flags & SHMOB_DRM_IFACE_FL_DWCNT) ? LDMT1R_DWCNT : 0); - lcdc_write(sdev, LDMT1R, value); - - if (idata->interface >= SHMOB_DRM_IFACE_SYS8A && - idata->interface <= SHMOB_DRM_IFACE_SYS24) { - /* Setup SYS bus. */ - value = (idata->sys.cs_setup << LDMT2R_CSUP_SHIFT) - | (idata->sys.vsync_active_high ? LDMT2R_RSV : 0) - | (idata->sys.vsync_dir_input ? LDMT2R_VSEL : 0) - | (idata->sys.write_setup << LDMT2R_WCSC_SHIFT) - | (idata->sys.write_cycle << LDMT2R_WCEC_SHIFT) - | (idata->sys.write_strobe << LDMT2R_WCLW_SHIFT); - lcdc_write(sdev, LDMT2R, value); - - value = (idata->sys.read_latch << LDMT3R_RDLC_SHIFT) - | (idata->sys.read_setup << LDMT3R_RCSC_SHIFT) - | (idata->sys.read_cycle << LDMT3R_RCEC_SHIFT) - | (idata->sys.read_strobe << LDMT3R_RCLW_SHIFT); - lcdc_write(sdev, LDMT3R, value); + if (!info->num_bus_formats || !info->bus_formats) { + dev_warn(sdev->dev, "No bus format reported, using RGB888\n"); + value = LDMT1R_MIFTYP_RGB24; + } else { + for (i = 0; i < ARRAY_SIZE(shmob_drm_bus_fmts); i++) { + if (shmob_drm_bus_fmts[i].fmt == info->bus_formats[0]) + break; + } + if (i < ARRAY_SIZE(shmob_drm_bus_fmts)) { + value = shmob_drm_bus_fmts[i].ldmt1r; + } else { + dev_warn(sdev->dev, + "unsupported bus format 0x%x, using RGB888\n", + info->bus_formats[0]); + value = LDMT1R_MIFTYP_RGB24; + } } + if (info->bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE) + value |= LDMT1R_DWPOL; + if (info->bus_flags & DRM_BUS_FLAG_DE_LOW) + value |= LDMT1R_DIPOL; + if (mode->flags & DRM_MODE_FLAG_NVSYNC) + value |= LDMT1R_VPOL; + if (mode->flags & DRM_MODE_FLAG_NHSYNC) + value |= LDMT1R_HPOL; + lcdc_write(sdev, LDMT1R, value); + value = ((mode->hdisplay / 8) << 16) /* HDCN */ | (mode->htotal / 8); /* HTCN */ lcdc_write(sdev, LDHCNR, value); @@ -121,7 +168,7 @@ static void shmob_drm_crtc_setup_geometry(struct shmob_drm_crtc *scrtc) static void shmob_drm_crtc_start_stop(struct shmob_drm_crtc *scrtc, bool start) { - struct shmob_drm_device *sdev = scrtc->crtc.dev->dev_private; + struct shmob_drm_device *sdev = to_shmob_device(scrtc->base.dev); u32 value; value = lcdc_read(sdev, LDCNT2R); @@ -145,34 +192,23 @@ static void shmob_drm_crtc_start_stop(struct shmob_drm_crtc *scrtc, bool start) } } -/* - * shmob_drm_crtc_start - Configure and start the LCDC - * @scrtc: the SH Mobile CRTC - * - * Configure and start the LCDC device. External devices (clocks, MERAM, panels, - * ...) are not touched by this function. - */ -static void shmob_drm_crtc_start(struct shmob_drm_crtc *scrtc) +static inline struct shmob_drm_crtc *to_shmob_crtc(struct drm_crtc *crtc) +{ + return container_of(crtc, struct shmob_drm_crtc, base); +} + +static void shmob_drm_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) { - struct drm_crtc *crtc = &scrtc->crtc; - struct shmob_drm_device *sdev = crtc->dev->dev_private; - const struct shmob_drm_interface_data *idata = &sdev->pdata->iface; - const struct shmob_drm_format_info *format; - struct drm_device *dev = sdev->ddev; - struct drm_plane *plane; + struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc); + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); + unsigned int clk_div = sdev->config.clk_div; + struct device *dev = sdev->dev; u32 value; int ret; - if (scrtc->started) - return; - - format = shmob_drm_format_info(crtc->primary->fb->format->format); - if (WARN_ON(format == NULL)) - return; - - /* Enable clocks before accessing the hardware. */ - ret = shmob_drm_clk_on(sdev); - if (ret < 0) + ret = pm_runtime_resume_and_get(dev); + if (ret) return; /* Reset and enable the LCDC. */ @@ -188,79 +224,50 @@ static void shmob_drm_crtc_start(struct shmob_drm_crtc *scrtc) lcdc_write(sdev, LDPMR, 0); value = sdev->lddckr; - if (idata->clk_div) { + if (clk_div) { /* FIXME: sh7724 can only use 42, 48, 54 and 60 for the divider * denominator. */ lcdc_write(sdev, LDDCKPAT1R, 0); - lcdc_write(sdev, LDDCKPAT2R, (1 << (idata->clk_div / 2)) - 1); + lcdc_write(sdev, LDDCKPAT2R, (1 << (clk_div / 2)) - 1); - if (idata->clk_div == 1) + if (clk_div == 1) value |= LDDCKR_MOSEL; else - value |= idata->clk_div; + value |= clk_div; } lcdc_write(sdev, LDDCKR, value); lcdc_write(sdev, LDDCKSTPR, 0); lcdc_wait_bit(sdev, LDDCKSTPR, ~0, 0); - /* TODO: Setup SYS panel */ - /* Setup geometry, format, frame buffer memory and operation mode. */ shmob_drm_crtc_setup_geometry(scrtc); - /* TODO: Handle YUV colorspaces. Hardcode REC709 for now. */ - lcdc_write(sdev, LDDFR, format->lddfr | LDDFR_CF1); - lcdc_write(sdev, LDMLSR, scrtc->line_size); - lcdc_write(sdev, LDSA1R, scrtc->dma[0]); - if (format->yuv) - lcdc_write(sdev, LDSA2R, scrtc->dma[1]); lcdc_write(sdev, LDSM1R, 0); - /* Word and long word swap. */ - switch (format->fourcc) { - case DRM_FORMAT_RGB565: - case DRM_FORMAT_NV21: - case DRM_FORMAT_NV61: - case DRM_FORMAT_NV42: - value = LDDDSR_LS | LDDDSR_WS; - break; - case DRM_FORMAT_RGB888: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV16: - case DRM_FORMAT_NV24: - value = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS; - break; - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - default: - value = LDDDSR_LS; - break; - } - lcdc_write(sdev, LDDDSR, value); - - /* Setup planes. */ - drm_for_each_legacy_plane(plane, dev) { - if (plane->crtc == crtc) - shmob_drm_plane_setup(plane); - } - /* Enable the display output. */ lcdc_write(sdev, LDCNT1R, LDCNT1R_DE); shmob_drm_crtc_start_stop(scrtc, true); - scrtc->started = true; + /* Turn vertical blank interrupt reporting back on. */ + drm_crtc_vblank_on(crtc); } -static void shmob_drm_crtc_stop(struct shmob_drm_crtc *scrtc) +static void shmob_drm_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) { - struct drm_crtc *crtc = &scrtc->crtc; - struct shmob_drm_device *sdev = crtc->dev->dev_private; + struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc); + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); - if (!scrtc->started) - return; + /* + * Disable vertical blank interrupt reporting. We first need to wait + * for page flip completion before stopping the CRTC as userspace + * expects page flips to eventually complete. + */ + shmob_drm_crtc_wait_page_flip(scrtc); + drm_crtc_vblank_off(crtc); /* Stop the LCDC. */ shmob_drm_crtc_start_stop(scrtc, false); @@ -268,145 +275,31 @@ static void shmob_drm_crtc_stop(struct shmob_drm_crtc *scrtc) /* Disable the display output. */ lcdc_write(sdev, LDCNT1R, 0); - /* Stop clocks. */ - shmob_drm_clk_off(sdev); - - scrtc->started = false; -} - -void shmob_drm_crtc_suspend(struct shmob_drm_crtc *scrtc) -{ - shmob_drm_crtc_stop(scrtc); -} - -void shmob_drm_crtc_resume(struct shmob_drm_crtc *scrtc) -{ - if (scrtc->dpms != DRM_MODE_DPMS_ON) - return; - - shmob_drm_crtc_start(scrtc); + pm_runtime_put(sdev->dev); } -static void shmob_drm_crtc_compute_base(struct shmob_drm_crtc *scrtc, - int x, int y) +static void shmob_drm_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) { - struct drm_crtc *crtc = &scrtc->crtc; - struct drm_framebuffer *fb = crtc->primary->fb; - struct drm_gem_dma_object *gem; - unsigned int bpp; - - bpp = scrtc->format->yuv ? 8 : scrtc->format->bpp; - gem = drm_fb_dma_get_gem_obj(fb, 0); - scrtc->dma[0] = gem->dma_addr + fb->offsets[0] - + y * fb->pitches[0] + x * bpp / 8; - - if (scrtc->format->yuv) { - bpp = scrtc->format->bpp - 8; - gem = drm_fb_dma_get_gem_obj(fb, 1); - scrtc->dma[1] = gem->dma_addr + fb->offsets[1] - + y / (bpp == 4 ? 2 : 1) * fb->pitches[1] - + x * (bpp == 16 ? 2 : 1); - } -} - -static void shmob_drm_crtc_update_base(struct shmob_drm_crtc *scrtc) -{ - struct drm_crtc *crtc = &scrtc->crtc; - struct shmob_drm_device *sdev = crtc->dev->dev_private; - - shmob_drm_crtc_compute_base(scrtc, crtc->x, crtc->y); - - lcdc_write_mirror(sdev, LDSA1R, scrtc->dma[0]); - if (scrtc->format->yuv) - lcdc_write_mirror(sdev, LDSA2R, scrtc->dma[1]); - - lcdc_write(sdev, LDRCNTR, lcdc_read(sdev, LDRCNTR) ^ LDRCNTR_MRS); -} - -#define to_shmob_crtc(c) container_of(c, struct shmob_drm_crtc, crtc) - -static void shmob_drm_crtc_dpms(struct drm_crtc *crtc, int mode) -{ - struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc); - - if (scrtc->dpms == mode) - return; - - if (mode == DRM_MODE_DPMS_ON) - shmob_drm_crtc_start(scrtc); - else - shmob_drm_crtc_stop(scrtc); - - scrtc->dpms = mode; -} - -static void shmob_drm_crtc_mode_prepare(struct drm_crtc *crtc) -{ - shmob_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); -} + struct drm_pending_vblank_event *event; + struct drm_device *dev = crtc->dev; + unsigned long flags; -static int shmob_drm_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, - struct drm_framebuffer *old_fb) -{ - struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc); - struct shmob_drm_device *sdev = crtc->dev->dev_private; - const struct shmob_drm_format_info *format; - - format = shmob_drm_format_info(crtc->primary->fb->format->format); - if (format == NULL) { - dev_dbg(sdev->dev, "mode_set: unsupported format %p4cc\n", - &crtc->primary->fb->format->format); - return -EINVAL; + if (crtc->state->event) { + spin_lock_irqsave(&dev->event_lock, flags); + event = crtc->state->event; + crtc->state->event = NULL; + drm_crtc_send_vblank_event(crtc, event); + spin_unlock_irqrestore(&dev->event_lock, flags); } - - scrtc->format = format; - scrtc->line_size = crtc->primary->fb->pitches[0]; - - shmob_drm_crtc_compute_base(scrtc, x, y); - - return 0; -} - -static void shmob_drm_crtc_mode_commit(struct drm_crtc *crtc) -{ - shmob_drm_crtc_dpms(crtc, DRM_MODE_DPMS_ON); -} - -static int shmob_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - shmob_drm_crtc_update_base(to_shmob_crtc(crtc)); - - return 0; } static const struct drm_crtc_helper_funcs crtc_helper_funcs = { - .dpms = shmob_drm_crtc_dpms, - .prepare = shmob_drm_crtc_mode_prepare, - .commit = shmob_drm_crtc_mode_commit, - .mode_set = shmob_drm_crtc_mode_set, - .mode_set_base = shmob_drm_crtc_mode_set_base, + .atomic_flush = shmob_drm_crtc_atomic_flush, + .atomic_enable = shmob_drm_crtc_atomic_enable, + .atomic_disable = shmob_drm_crtc_atomic_disable, }; -void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc) -{ - struct drm_pending_vblank_event *event; - struct drm_device *dev = scrtc->crtc.dev; - unsigned long flags; - - spin_lock_irqsave(&dev->event_lock, flags); - event = scrtc->event; - scrtc->event = NULL; - if (event) { - drm_crtc_send_vblank_event(&scrtc->crtc, event); - drm_crtc_vblank_put(&scrtc->crtc); - } - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -414,7 +307,7 @@ static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc); - struct drm_device *dev = scrtc->crtc.dev; + struct drm_device *dev = scrtc->base.dev; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); @@ -424,12 +317,11 @@ static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc, } spin_unlock_irqrestore(&dev->event_lock, flags); - crtc->primary->fb = fb; - shmob_drm_crtc_update_base(scrtc); + drm_atomic_set_fb_for_plane(crtc->primary->state, fb); if (event) { event->pipe = 0; - drm_crtc_vblank_get(&scrtc->crtc); + drm_crtc_vblank_get(&scrtc->base); spin_lock_irqsave(&dev->event_lock, flags); scrtc->event = event; spin_unlock_irqrestore(&dev->event_lock, flags); @@ -457,7 +349,7 @@ static void shmob_drm_crtc_enable_vblank(struct shmob_drm_device *sdev, static int shmob_drm_enable_vblank(struct drm_crtc *crtc) { - struct shmob_drm_device *sdev = crtc->dev->dev_private; + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); shmob_drm_crtc_enable_vblank(sdev, true); @@ -466,88 +358,65 @@ static int shmob_drm_enable_vblank(struct drm_crtc *crtc) static void shmob_drm_disable_vblank(struct drm_crtc *crtc) { - struct shmob_drm_device *sdev = crtc->dev->dev_private; + struct shmob_drm_device *sdev = to_shmob_device(crtc->dev); shmob_drm_crtc_enable_vblank(sdev, false); } static const struct drm_crtc_funcs crtc_funcs = { + .reset = drm_atomic_helper_crtc_reset, .destroy = drm_crtc_cleanup, - .set_config = drm_crtc_helper_set_config, + .set_config = drm_atomic_helper_set_config, .page_flip = shmob_drm_crtc_page_flip, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, .enable_vblank = shmob_drm_enable_vblank, .disable_vblank = shmob_drm_disable_vblank, }; -static const uint32_t modeset_formats[] = { - DRM_FORMAT_RGB565, - DRM_FORMAT_RGB888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_XRGB8888, -}; - -static const struct drm_plane_funcs primary_plane_funcs = { - DRM_PLANE_NON_ATOMIC_FUNCS, -}; - int shmob_drm_crtc_create(struct shmob_drm_device *sdev) { - struct drm_crtc *crtc = &sdev->crtc.crtc; - struct drm_plane *primary; + struct drm_crtc *crtc = &sdev->crtc.base; + struct drm_plane *primary, *plane; + unsigned int i; int ret; - sdev->crtc.dpms = DRM_MODE_DPMS_OFF; + init_waitqueue_head(&sdev->crtc.flip_wait); - primary = __drm_universal_plane_alloc(sdev->ddev, sizeof(*primary), 0, - 0, &primary_plane_funcs, - modeset_formats, - ARRAY_SIZE(modeset_formats), - NULL, DRM_PLANE_TYPE_PRIMARY, - NULL); + primary = shmob_drm_plane_create(sdev, DRM_PLANE_TYPE_PRIMARY, 0); if (IS_ERR(primary)) return PTR_ERR(primary); - ret = drm_crtc_init_with_planes(sdev->ddev, crtc, primary, NULL, + for (i = 1; i < 5; ++i) { + plane = shmob_drm_plane_create(sdev, DRM_PLANE_TYPE_OVERLAY, i); + if (IS_ERR(plane)) + return PTR_ERR(plane); + } + + ret = drm_crtc_init_with_planes(&sdev->ddev, crtc, primary, NULL, &crtc_funcs, NULL); - if (ret < 0) { - drm_plane_cleanup(primary); - kfree(primary); + if (ret < 0) return ret; - } drm_crtc_helper_add(crtc, &crtc_helper_funcs); + /* Start with vertical blank interrupt reporting disabled. */ + drm_crtc_vblank_off(crtc); + return 0; } /* ----------------------------------------------------------------------------- - * Encoder + * Legacy Encoder */ -#define to_shmob_encoder(e) \ - container_of(e, struct shmob_drm_encoder, encoder) - -static void shmob_drm_encoder_dpms(struct drm_encoder *encoder, int mode) -{ - struct shmob_drm_encoder *senc = to_shmob_encoder(encoder); - struct shmob_drm_device *sdev = encoder->dev->dev_private; - struct shmob_drm_connector *scon = &sdev->connector; - - if (senc->dpms == mode) - return; - - shmob_drm_backlight_dpms(scon, mode); - - senc->dpms = mode; -} - static bool shmob_drm_encoder_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = encoder->dev; - struct shmob_drm_device *sdev = dev->dev_private; - struct drm_connector *connector = &sdev->connector.connector; + struct shmob_drm_device *sdev = to_shmob_device(dev); + struct drm_connector *connector = sdev->connector; const struct drm_display_mode *panel_mode; if (list_empty(&connector->modes)) { @@ -563,60 +432,61 @@ static bool shmob_drm_encoder_mode_fixup(struct drm_encoder *encoder, return true; } -static void shmob_drm_encoder_mode_prepare(struct drm_encoder *encoder) -{ - /* No-op, everything is handled in the CRTC code. */ -} - -static void shmob_drm_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - /* No-op, everything is handled in the CRTC code. */ -} - -static void shmob_drm_encoder_mode_commit(struct drm_encoder *encoder) -{ - /* No-op, everything is handled in the CRTC code. */ -} - static const struct drm_encoder_helper_funcs encoder_helper_funcs = { - .dpms = shmob_drm_encoder_dpms, .mode_fixup = shmob_drm_encoder_mode_fixup, - .prepare = shmob_drm_encoder_mode_prepare, - .commit = shmob_drm_encoder_mode_commit, - .mode_set = shmob_drm_encoder_mode_set, }; +/* ----------------------------------------------------------------------------- + * Encoder + */ + int shmob_drm_encoder_create(struct shmob_drm_device *sdev) { - struct drm_encoder *encoder = &sdev->encoder.encoder; + struct drm_encoder *encoder = &sdev->encoder; + struct drm_bridge *bridge; int ret; - sdev->encoder.dpms = DRM_MODE_DPMS_OFF; - encoder->possible_crtcs = 1; - ret = drm_simple_encoder_init(sdev->ddev, encoder, - DRM_MODE_ENCODER_LVDS); + ret = drm_simple_encoder_init(&sdev->ddev, encoder, + DRM_MODE_ENCODER_DPI); if (ret < 0) return ret; - drm_encoder_helper_add(encoder, &encoder_helper_funcs); + if (sdev->pdata) { + drm_encoder_helper_add(encoder, &encoder_helper_funcs); + return 0; + } + + /* Create a panel bridge */ + bridge = devm_drm_of_get_bridge(sdev->dev, sdev->dev->of_node, 0, 0); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + + /* Attach the bridge to the encoder */ + ret = drm_bridge_attach(encoder, bridge, NULL, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) { + dev_err(sdev->dev, "failed to attach bridge: %pe\n", + ERR_PTR(ret)); + return ret; + } return 0; } /* ----------------------------------------------------------------------------- - * Connector + * Legacy Connector */ -#define to_shmob_connector(c) \ - container_of(c, struct shmob_drm_connector, connector) +static inline struct shmob_drm_connector *to_shmob_connector(struct drm_connector *connector) +{ + return container_of(connector, struct shmob_drm_connector, base); +} static int shmob_drm_connector_get_modes(struct drm_connector *connector) { - struct shmob_drm_device *sdev = connector->dev->dev_private; + struct shmob_drm_connector *scon = to_shmob_connector(connector); struct drm_display_mode *mode; mode = drm_mode_create(connector->dev); @@ -624,22 +494,10 @@ static int shmob_drm_connector_get_modes(struct drm_connector *connector) return 0; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; - mode->clock = sdev->pdata->panel.mode.clock; - mode->hdisplay = sdev->pdata->panel.mode.hdisplay; - mode->hsync_start = sdev->pdata->panel.mode.hsync_start; - mode->hsync_end = sdev->pdata->panel.mode.hsync_end; - mode->htotal = sdev->pdata->panel.mode.htotal; - mode->vdisplay = sdev->pdata->panel.mode.vdisplay; - mode->vsync_start = sdev->pdata->panel.mode.vsync_start; - mode->vsync_end = sdev->pdata->panel.mode.vsync_end; - mode->vtotal = sdev->pdata->panel.mode.vtotal; - mode->flags = sdev->pdata->panel.mode.flags; - - drm_mode_set_name(mode); - drm_mode_probed_add(connector, mode); - connector->display_info.width_mm = sdev->pdata->panel.width_mm; - connector->display_info.height_mm = sdev->pdata->panel.height_mm; + drm_display_mode_from_videomode(scon->mode, mode); + + drm_mode_probed_add(connector, mode); return 1; } @@ -659,54 +517,106 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = { static void shmob_drm_connector_destroy(struct drm_connector *connector) { - struct shmob_drm_connector *scon = to_shmob_connector(connector); - - shmob_drm_backlight_exit(scon); drm_connector_unregister(connector); drm_connector_cleanup(connector); + + kfree(connector); } static const struct drm_connector_funcs connector_funcs = { - .dpms = drm_helper_connector_dpms, + .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = shmob_drm_connector_destroy, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -int shmob_drm_connector_create(struct shmob_drm_device *sdev, - struct drm_encoder *encoder) +static struct drm_connector * +shmob_drm_connector_init(struct shmob_drm_device *sdev, + struct drm_encoder *encoder) { - struct drm_connector *connector = &sdev->connector.connector; + u32 bus_fmt = sdev->pdata->iface.bus_fmt; + struct shmob_drm_connector *scon; + struct drm_connector *connector; + struct drm_display_info *info; + unsigned int i; int ret; - sdev->connector.encoder = encoder; + for (i = 0; i < ARRAY_SIZE(shmob_drm_bus_fmts); i++) { + if (shmob_drm_bus_fmts[i].fmt == bus_fmt) + break; + } + if (i == ARRAY_SIZE(shmob_drm_bus_fmts)) { + dev_err(sdev->dev, "unsupported bus format 0x%x\n", bus_fmt); + return ERR_PTR(-EINVAL); + } - connector->display_info.width_mm = sdev->pdata->panel.width_mm; - connector->display_info.height_mm = sdev->pdata->panel.height_mm; + scon = kzalloc(sizeof(*scon), GFP_KERNEL); + if (!scon) + return ERR_PTR(-ENOMEM); - ret = drm_connector_init(sdev->ddev, connector, &connector_funcs, - DRM_MODE_CONNECTOR_LVDS); - if (ret < 0) - return ret; + connector = &scon->base; + scon->encoder = encoder; + scon->mode = &sdev->pdata->panel.mode; + + info = &connector->display_info; + info->width_mm = sdev->pdata->panel.width_mm; + info->height_mm = sdev->pdata->panel.height_mm; + + if (scon->mode->flags & DISPLAY_FLAGS_PIXDATA_POSEDGE) + info->bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE; + if (scon->mode->flags & DISPLAY_FLAGS_DE_LOW) + info->bus_flags |= DRM_BUS_FLAG_DE_LOW; + + ret = drm_display_info_set_bus_formats(info, &bus_fmt, 1); + if (ret < 0) { + kfree(scon); + return ERR_PTR(ret); + } + + ret = drm_connector_init(&sdev->ddev, connector, &connector_funcs, + DRM_MODE_CONNECTOR_DPI); + if (ret < 0) { + kfree(scon); + return ERR_PTR(ret); + } drm_connector_helper_add(connector, &connector_helper_funcs); - ret = shmob_drm_backlight_init(&sdev->connector); - if (ret < 0) - goto err_cleanup; + return connector; +} + +/* ----------------------------------------------------------------------------- + * Connector + */ + +int shmob_drm_connector_create(struct shmob_drm_device *sdev, + struct drm_encoder *encoder) +{ + struct drm_connector *connector; + int ret; + + if (sdev->pdata) + connector = shmob_drm_connector_init(sdev, encoder); + else + connector = drm_bridge_connector_init(&sdev->ddev, encoder); + if (IS_ERR(connector)) { + dev_err(sdev->dev, "failed to created connector: %pe\n", + connector); + return PTR_ERR(connector); + } ret = drm_connector_attach_encoder(connector, encoder); if (ret < 0) - goto err_backlight; + goto error; + + connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - drm_object_property_set_value(&connector->base, - sdev->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF); + sdev->connector = connector; return 0; -err_backlight: - shmob_drm_backlight_exit(&sdev->connector); -err_cleanup: +error: drm_connector_cleanup(connector); return ret; } diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h index 21718843f46d..16e1712dd04e 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h @@ -14,39 +14,30 @@ #include <drm/drm_connector.h> #include <drm/drm_encoder.h> -struct backlight_device; +#include <linux/wait.h> + +#include <video/videomode.h> + struct drm_pending_vblank_event; struct shmob_drm_device; struct shmob_drm_format_info; struct shmob_drm_crtc { - struct drm_crtc crtc; + struct drm_crtc base; struct drm_pending_vblank_event *event; - int dpms; - - const struct shmob_drm_format_info *format; - unsigned long dma[2]; - unsigned int line_size; - bool started; -}; - -struct shmob_drm_encoder { - struct drm_encoder encoder; - int dpms; + wait_queue_head_t flip_wait; }; +/* Legacy connector */ struct shmob_drm_connector { - struct drm_connector connector; + struct drm_connector base; struct drm_encoder *encoder; - - struct backlight_device *backlight; + const struct videomode *mode; }; int shmob_drm_crtc_create(struct shmob_drm_device *sdev); void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc); -void shmob_drm_crtc_suspend(struct shmob_drm_crtc *scrtc); -void shmob_drm_crtc_resume(struct shmob_drm_crtc *scrtc); int shmob_drm_encoder_create(struct shmob_drm_device *sdev); int shmob_drm_connector_create(struct shmob_drm_device *sdev, diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c index e5db4e0095ba..e83c3e52251d 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c @@ -11,13 +11,17 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> +#include <drm/drm_atomic_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_generic.h> #include <drm/drm_gem_dma_helper.h> +#include <drm/drm_modeset_helper.h> #include <drm/drm_module.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -31,57 +35,23 @@ * Hardware initialization */ -static int shmob_drm_init_interface(struct shmob_drm_device *sdev) -{ - static const u32 ldmt1r[] = { - [SHMOB_DRM_IFACE_RGB8] = LDMT1R_MIFTYP_RGB8, - [SHMOB_DRM_IFACE_RGB9] = LDMT1R_MIFTYP_RGB9, - [SHMOB_DRM_IFACE_RGB12A] = LDMT1R_MIFTYP_RGB12A, - [SHMOB_DRM_IFACE_RGB12B] = LDMT1R_MIFTYP_RGB12B, - [SHMOB_DRM_IFACE_RGB16] = LDMT1R_MIFTYP_RGB16, - [SHMOB_DRM_IFACE_RGB18] = LDMT1R_MIFTYP_RGB18, - [SHMOB_DRM_IFACE_RGB24] = LDMT1R_MIFTYP_RGB24, - [SHMOB_DRM_IFACE_YUV422] = LDMT1R_MIFTYP_YCBCR, - [SHMOB_DRM_IFACE_SYS8A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8A, - [SHMOB_DRM_IFACE_SYS8B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8B, - [SHMOB_DRM_IFACE_SYS8C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8C, - [SHMOB_DRM_IFACE_SYS8D] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8D, - [SHMOB_DRM_IFACE_SYS9] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS9, - [SHMOB_DRM_IFACE_SYS12] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS12, - [SHMOB_DRM_IFACE_SYS16A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16A, - [SHMOB_DRM_IFACE_SYS16B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16B, - [SHMOB_DRM_IFACE_SYS16C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16C, - [SHMOB_DRM_IFACE_SYS18] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS18, - [SHMOB_DRM_IFACE_SYS24] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS24, - }; - - if (sdev->pdata->iface.interface >= ARRAY_SIZE(ldmt1r)) { - dev_err(sdev->dev, "invalid interface type %u\n", - sdev->pdata->iface.interface); - return -EINVAL; - } - - sdev->ldmt1r = ldmt1r[sdev->pdata->iface.interface]; - return 0; -} - static int shmob_drm_setup_clocks(struct shmob_drm_device *sdev, - enum shmob_drm_clk_source clksrc) + enum shmob_drm_clk_source clksrc) { struct clk *clk; char *clkname; switch (clksrc) { case SHMOB_DRM_CLK_BUS: - clkname = "bus_clk"; + clkname = "fck"; sdev->lddckr = LDDCKR_ICKSEL_BUS; break; case SHMOB_DRM_CLK_PERIPHERAL: - clkname = "peripheral_clk"; + clkname = "media"; sdev->lddckr = LDDCKR_ICKSEL_MIPI; break; case SHMOB_DRM_CLK_EXTERNAL: - clkname = NULL; + clkname = "lclk"; sdev->lddckr = LDDCKR_ICKSEL_HDMI; break; default: @@ -105,7 +75,7 @@ static int shmob_drm_setup_clocks(struct shmob_drm_device *sdev, static irqreturn_t shmob_drm_irq(int irq, void *arg) { struct drm_device *dev = arg; - struct shmob_drm_device *sdev = dev->dev_private; + struct shmob_drm_device *sdev = to_shmob_device(dev); unsigned long flags; u32 status; @@ -119,7 +89,7 @@ static irqreturn_t shmob_drm_irq(int irq, void *arg) spin_unlock_irqrestore(&sdev->irq_lock, flags); if (status & LDINTR_VES) { - drm_handle_vblank(dev, 0); + drm_crtc_handle_vblank(&sdev->crtc.base); shmob_drm_crtc_finish_page_flip(&sdev->crtc); } @@ -129,7 +99,7 @@ static irqreturn_t shmob_drm_irq(int irq, void *arg) DEFINE_DRM_GEM_DMA_FOPS(shmob_drm_fops); static const struct drm_driver shmob_drm_driver = { - .driver_features = DRIVER_GEM | DRIVER_MODESET, + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, DRM_GEM_DMA_DRIVER_OPS, .fops = &shmob_drm_fops, .name = "shmob-drm", @@ -147,26 +117,45 @@ static int shmob_drm_pm_suspend(struct device *dev) { struct shmob_drm_device *sdev = dev_get_drvdata(dev); - drm_kms_helper_poll_disable(sdev->ddev); - shmob_drm_crtc_suspend(&sdev->crtc); + return drm_mode_config_helper_suspend(&sdev->ddev); +} + +static int shmob_drm_pm_resume(struct device *dev) +{ + struct shmob_drm_device *sdev = dev_get_drvdata(dev); + + return drm_mode_config_helper_resume(&sdev->ddev); +} + +static int shmob_drm_pm_runtime_suspend(struct device *dev) +{ + struct shmob_drm_device *sdev = dev_get_drvdata(dev); + + if (sdev->clock) + clk_disable_unprepare(sdev->clock); return 0; } -static int shmob_drm_pm_resume(struct device *dev) +static int shmob_drm_pm_runtime_resume(struct device *dev) { struct shmob_drm_device *sdev = dev_get_drvdata(dev); + int ret; - drm_modeset_lock_all(sdev->ddev); - shmob_drm_crtc_resume(&sdev->crtc); - drm_modeset_unlock_all(sdev->ddev); + if (sdev->clock) { + ret = clk_prepare_enable(sdev->clock); + if (ret < 0) + return ret; + } - drm_kms_helper_poll_enable(sdev->ddev); return 0; } -static DEFINE_SIMPLE_DEV_PM_OPS(shmob_drm_pm_ops, - shmob_drm_pm_suspend, shmob_drm_pm_resume); +static const struct dev_pm_ops shmob_drm_pm_ops = { + SYSTEM_SLEEP_PM_OPS(shmob_drm_pm_suspend, shmob_drm_pm_resume) + RUNTIME_PM_OPS(shmob_drm_pm_runtime_suspend, + shmob_drm_pm_runtime_resume, NULL) +}; /* ----------------------------------------------------------------------------- * Platform driver @@ -175,37 +164,45 @@ static DEFINE_SIMPLE_DEV_PM_OPS(shmob_drm_pm_ops, static void shmob_drm_remove(struct platform_device *pdev) { struct shmob_drm_device *sdev = platform_get_drvdata(pdev); - struct drm_device *ddev = sdev->ddev; + struct drm_device *ddev = &sdev->ddev; drm_dev_unregister(ddev); + drm_atomic_helper_shutdown(ddev); drm_kms_helper_poll_fini(ddev); - free_irq(sdev->irq, ddev); - drm_dev_put(ddev); } static int shmob_drm_probe(struct platform_device *pdev) { struct shmob_drm_platform_data *pdata = pdev->dev.platform_data; + const struct shmob_drm_config *config; struct shmob_drm_device *sdev; struct drm_device *ddev; - unsigned int i; int ret; - if (pdata == NULL) { + config = of_device_get_match_data(&pdev->dev); + if (!config && !pdata) { dev_err(&pdev->dev, "no platform data\n"); return -EINVAL; } /* - * Allocate and initialize the driver private data, I/O resources and - * clocks. + * Allocate and initialize the DRM device, driver private data, I/O + * resources and clocks. */ - sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev), GFP_KERNEL); - if (sdev == NULL) - return -ENOMEM; + sdev = devm_drm_dev_alloc(&pdev->dev, &shmob_drm_driver, + struct shmob_drm_device, ddev); + if (IS_ERR(sdev)) + return PTR_ERR(sdev); + ddev = &sdev->ddev; sdev->dev = &pdev->dev; - sdev->pdata = pdata; + if (config) { + sdev->config = *config; + } else { + sdev->pdata = pdata; + sdev->config.clk_source = pdata->clk_source; + sdev->config.clk_div = pdata->iface.clk_div; + } spin_lock_init(&sdev->irq_lock); platform_set_drvdata(pdev, sdev); @@ -214,49 +211,32 @@ static int shmob_drm_probe(struct platform_device *pdev) if (IS_ERR(sdev->mmio)) return PTR_ERR(sdev->mmio); - ret = shmob_drm_setup_clocks(sdev, pdata->clk_source); + ret = shmob_drm_setup_clocks(sdev, sdev->config.clk_source); if (ret < 0) return ret; - ret = shmob_drm_init_interface(sdev); - if (ret < 0) + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) return ret; - /* Allocate and initialize the DRM device. */ - ddev = drm_dev_alloc(&shmob_drm_driver, &pdev->dev); - if (IS_ERR(ddev)) - return PTR_ERR(ddev); - - sdev->ddev = ddev; - ddev->dev_private = sdev; - - ret = shmob_drm_modeset_init(sdev); - if (ret < 0) { - dev_err(&pdev->dev, "failed to initialize mode setting\n"); - goto err_free_drm_dev; - } - - for (i = 0; i < 4; ++i) { - ret = shmob_drm_plane_create(sdev, i); - if (ret < 0) { - dev_err(&pdev->dev, "failed to create plane %u\n", i); - goto err_modeset_cleanup; - } - } - ret = drm_vblank_init(ddev, 1); if (ret < 0) { dev_err(&pdev->dev, "failed to initialize vblank\n"); - goto err_modeset_cleanup; + return ret; } + ret = shmob_drm_modeset_init(sdev); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to initialize mode setting\n"); + ret = platform_get_irq(pdev, 0); if (ret < 0) goto err_modeset_cleanup; sdev->irq = ret; - ret = request_irq(sdev->irq, shmob_drm_irq, 0, ddev->driver->name, - ddev); + ret = devm_request_irq(&pdev->dev, sdev->irq, shmob_drm_irq, 0, + ddev->driver->name, ddev); if (ret < 0) { dev_err(&pdev->dev, "failed to install IRQ handler\n"); goto err_modeset_cleanup; @@ -268,28 +248,35 @@ static int shmob_drm_probe(struct platform_device *pdev) */ ret = drm_dev_register(ddev, 0); if (ret < 0) - goto err_irq_uninstall; + goto err_modeset_cleanup; drm_fbdev_generic_setup(ddev, 16); return 0; -err_irq_uninstall: - free_irq(sdev->irq, ddev); err_modeset_cleanup: drm_kms_helper_poll_fini(ddev); -err_free_drm_dev: - drm_dev_put(ddev); - return ret; } +static const struct shmob_drm_config shmob_arm_config = { + .clk_source = SHMOB_DRM_CLK_BUS, + .clk_div = 5, +}; + +static const struct of_device_id shmob_drm_of_table[] __maybe_unused = { + { .compatible = "renesas,r8a7740-lcdc", .data = &shmob_arm_config, }, + { .compatible = "renesas,sh73a0-lcdc", .data = &shmob_arm_config, }, + { /* sentinel */ } +}; + static struct platform_driver shmob_drm_platform_driver = { .probe = shmob_drm_probe, .remove_new = shmob_drm_remove, .driver = { .name = "shmob-drm", - .pm = pm_sleep_ptr(&shmob_drm_pm_ops), + .of_match_table = of_match_ptr(shmob_drm_of_table), + .pm = &shmob_drm_pm_ops, }, }; diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h index 4964ddd5ab74..088ac5381e91 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h @@ -20,23 +20,33 @@ struct clk; struct device; struct drm_device; +struct shmob_drm_config { + enum shmob_drm_clk_source clk_source; + unsigned int clk_div; +}; + struct shmob_drm_device { struct device *dev; const struct shmob_drm_platform_data *pdata; + struct shmob_drm_config config; void __iomem *mmio; struct clk *clock; u32 lddckr; - u32 ldmt1r; unsigned int irq; spinlock_t irq_lock; /* Protects hardware LDINTR register */ - struct drm_device *ddev; + struct drm_device ddev; struct shmob_drm_crtc crtc; - struct shmob_drm_encoder encoder; - struct shmob_drm_connector connector; + struct drm_encoder encoder; + struct drm_connector *connector; }; +static inline struct shmob_drm_device *to_shmob_device(struct drm_device *dev) +{ + return container_of(dev, struct shmob_drm_device, ddev); +} + #endif /* __SHMOB_DRM_DRV_H__ */ diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c index 99381cc0abf3..4202ab00fb0c 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c @@ -7,6 +7,7 @@ * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ +#include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fourcc.h> @@ -17,6 +18,7 @@ #include "shmob_drm_crtc.h" #include "shmob_drm_drv.h" #include "shmob_drm_kms.h" +#include "shmob_drm_plane.h" #include "shmob_drm_regs.h" /* ----------------------------------------------------------------------------- @@ -27,53 +29,73 @@ static const struct shmob_drm_format_info shmob_drm_format_infos[] = { { .fourcc = DRM_FORMAT_RGB565, .bpp = 16, - .yuv = false, .lddfr = LDDFR_PKF_RGB16, + .ldddsr = LDDDSR_LS | LDDDSR_WS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_RY | LDBBSIFR_RPKF_RGB16, }, { .fourcc = DRM_FORMAT_RGB888, .bpp = 24, - .yuv = false, .lddfr = LDDFR_PKF_RGB24, + .ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_SWPB | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB24, }, { .fourcc = DRM_FORMAT_ARGB8888, .bpp = 32, - .yuv = false, .lddfr = LDDFR_PKF_ARGB32, + .ldddsr = LDDDSR_LS, + .ldbbsifr = LDBBSIFR_AL_PK | LDBBSIFR_SWPL | LDBBSIFR_RY | + LDBBSIFR_RPKF_ARGB32, }, { .fourcc = DRM_FORMAT_XRGB8888, .bpp = 32, - .yuv = false, .lddfr = LDDFR_PKF_ARGB32, + .ldddsr = LDDDSR_LS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_RY | + LDBBSIFR_RPKF_ARGB32, }, { .fourcc = DRM_FORMAT_NV12, .bpp = 12, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_420, + .ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_SWPB | LDBBSIFR_CHRR_420, }, { .fourcc = DRM_FORMAT_NV21, .bpp = 12, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_420, + .ldddsr = LDDDSR_LS | LDDDSR_WS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_CHRR_420, }, { .fourcc = DRM_FORMAT_NV16, .bpp = 16, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_422, + .ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_SWPB | LDBBSIFR_CHRR_422, }, { .fourcc = DRM_FORMAT_NV61, .bpp = 16, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_422, + .ldddsr = LDDDSR_LS | LDDDSR_WS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_CHRR_422, }, { .fourcc = DRM_FORMAT_NV24, .bpp = 24, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_444, + .ldddsr = LDDDSR_LS | LDDDSR_WS | LDDDSR_BS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_SWPB | LDBBSIFR_CHRR_444, }, { .fourcc = DRM_FORMAT_NV42, .bpp = 24, - .yuv = true, .lddfr = LDDFR_CC | LDDFR_YF_444, + .ldddsr = LDDDSR_LS | LDDDSR_WS, + .ldbbsifr = LDBBSIFR_AL_1 | LDBBSIFR_SWPL | LDBBSIFR_SWPW | + LDBBSIFR_CHRR_444, }, }; @@ -112,7 +134,7 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-EINVAL); } - if (format->yuv) { + if (shmob_drm_format_is_yuv(format)) { unsigned int chroma_cpp = format->bpp == 24 ? 2 : 1; if (mode_cmd->pitches[1] != mode_cmd->pitches[0] * chroma_cpp) { @@ -127,29 +149,40 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, static const struct drm_mode_config_funcs shmob_drm_mode_config_funcs = { .fb_create = shmob_drm_fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, }; int shmob_drm_modeset_init(struct shmob_drm_device *sdev) { + struct drm_device *dev = &sdev->ddev; int ret; - ret = drmm_mode_config_init(sdev->ddev); + ret = drmm_mode_config_init(dev); if (ret) return ret; - shmob_drm_crtc_create(sdev); - shmob_drm_encoder_create(sdev); - shmob_drm_connector_create(sdev, &sdev->encoder.encoder); + ret = shmob_drm_crtc_create(sdev); + if (ret < 0) + return ret; + + ret = shmob_drm_encoder_create(sdev); + if (ret < 0) + return ret; + + ret = shmob_drm_connector_create(sdev, &sdev->encoder); + if (ret < 0) + return ret; - drm_kms_helper_poll_init(sdev->ddev); + drm_mode_config_reset(dev); - sdev->ddev->mode_config.min_width = 0; - sdev->ddev->mode_config.min_height = 0; - sdev->ddev->mode_config.max_width = 4095; - sdev->ddev->mode_config.max_height = 4095; - sdev->ddev->mode_config.funcs = &shmob_drm_mode_config_funcs; + drm_kms_helper_poll_init(dev); - drm_helper_disable_unused_functions(sdev->ddev); + sdev->ddev.mode_config.min_width = 0; + sdev->ddev.mode_config.min_height = 0; + sdev->ddev.mode_config.max_width = 4095; + sdev->ddev.mode_config.max_height = 4095; + sdev->ddev.mode_config.funcs = &shmob_drm_mode_config_funcs; return 0; } diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h index 0347b1fd2338..590162c3db20 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.h @@ -17,11 +17,14 @@ struct shmob_drm_device; struct shmob_drm_format_info { u32 fourcc; - unsigned int bpp; - bool yuv; - u32 lddfr; + u32 lddfr; /* LCD Data Format Register */ + u16 ldbbsifr; /* CHn Source Image Format Register low bits */ + u8 ldddsr; /* LCDC Input Image Data Swap Register low bits */ + u8 bpp; }; +#define shmob_drm_format_is_yuv(format) ((format)->lddfr & LDDFR_CC) + const struct shmob_drm_format_info *shmob_drm_format_info(u32 fourcc); int shmob_drm_modeset_init(struct shmob_drm_device *sdev); diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c index 850986cee848..8f9a728affde 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.c @@ -7,11 +7,14 @@ * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_fb_dma_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_dma_helper.h> +#include <drm/drm_plane_helper.h> #include "shmob_drm_drv.h" #include "shmob_drm_kms.h" @@ -19,102 +22,84 @@ #include "shmob_drm_regs.h" struct shmob_drm_plane { - struct drm_plane plane; + struct drm_plane base; unsigned int index; - unsigned int alpha; +}; + +struct shmob_drm_plane_state { + struct drm_plane_state base; const struct shmob_drm_format_info *format; - unsigned long dma[2]; - - unsigned int src_x; - unsigned int src_y; - unsigned int crtc_x; - unsigned int crtc_y; - unsigned int crtc_w; - unsigned int crtc_h; + u32 dma[2]; }; -#define to_shmob_plane(p) container_of(p, struct shmob_drm_plane, plane) +static inline struct shmob_drm_plane *to_shmob_plane(struct drm_plane *plane) +{ + return container_of(plane, struct shmob_drm_plane, base); +} + +static inline struct shmob_drm_plane_state *to_shmob_plane_state(struct drm_plane_state *state) +{ + return container_of(state, struct shmob_drm_plane_state, base); +} -static void shmob_drm_plane_compute_base(struct shmob_drm_plane *splane, - struct drm_framebuffer *fb, - int x, int y) +static void shmob_drm_plane_compute_base(struct shmob_drm_plane_state *sstate) { + struct drm_framebuffer *fb = sstate->base.fb; + unsigned int x = sstate->base.src_x >> 16; + unsigned int y = sstate->base.src_y >> 16; struct drm_gem_dma_object *gem; unsigned int bpp; - bpp = splane->format->yuv ? 8 : splane->format->bpp; + bpp = shmob_drm_format_is_yuv(sstate->format) ? 8 : sstate->format->bpp; gem = drm_fb_dma_get_gem_obj(fb, 0); - splane->dma[0] = gem->dma_addr + fb->offsets[0] + sstate->dma[0] = gem->dma_addr + fb->offsets[0] + y * fb->pitches[0] + x * bpp / 8; - if (splane->format->yuv) { - bpp = splane->format->bpp - 8; + if (shmob_drm_format_is_yuv(sstate->format)) { + bpp = sstate->format->bpp - 8; gem = drm_fb_dma_get_gem_obj(fb, 1); - splane->dma[1] = gem->dma_addr + fb->offsets[1] + sstate->dma[1] = gem->dma_addr + fb->offsets[1] + y / (bpp == 4 ? 2 : 1) * fb->pitches[1] + x * (bpp == 16 ? 2 : 1); } } -static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane, - struct drm_framebuffer *fb) +static void shmob_drm_primary_plane_setup(struct shmob_drm_plane *splane, + struct drm_plane_state *state) { - struct shmob_drm_device *sdev = splane->plane.dev->dev_private; + struct shmob_drm_plane_state *sstate = to_shmob_plane_state(state); + struct shmob_drm_device *sdev = to_shmob_device(splane->base.dev); + struct drm_framebuffer *fb = state->fb; + + /* TODO: Handle YUV colorspaces. Hardcode REC709 for now. */ + lcdc_write(sdev, LDDFR, sstate->format->lddfr | LDDFR_CF1); + lcdc_write(sdev, LDMLSR, fb->pitches[0]); + + /* Word and long word swap. */ + lcdc_write(sdev, LDDDSR, sstate->format->ldddsr); + + lcdc_write_mirror(sdev, LDSA1R, sstate->dma[0]); + if (shmob_drm_format_is_yuv(sstate->format)) + lcdc_write_mirror(sdev, LDSA2R, sstate->dma[1]); + + lcdc_write(sdev, LDRCNTR, lcdc_read(sdev, LDRCNTR) ^ LDRCNTR_MRS); +} + +static void shmob_drm_overlay_plane_setup(struct shmob_drm_plane *splane, + struct drm_plane_state *state) +{ + struct shmob_drm_plane_state *sstate = to_shmob_plane_state(state); + struct shmob_drm_device *sdev = to_shmob_device(splane->base.dev); + struct drm_framebuffer *fb = state->fb; u32 format; /* TODO: Support ROP3 mode */ - format = LDBBSIFR_EN | (splane->alpha << LDBBSIFR_LAY_SHIFT); - - switch (splane->format->fourcc) { - case DRM_FORMAT_RGB565: - case DRM_FORMAT_NV21: - case DRM_FORMAT_NV61: - case DRM_FORMAT_NV42: - format |= LDBBSIFR_SWPL | LDBBSIFR_SWPW; - break; - case DRM_FORMAT_RGB888: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV16: - case DRM_FORMAT_NV24: - format |= LDBBSIFR_SWPL | LDBBSIFR_SWPW | LDBBSIFR_SWPB; - break; - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - default: - format |= LDBBSIFR_SWPL; - break; - } - - switch (splane->format->fourcc) { - case DRM_FORMAT_RGB565: - format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB16; - break; - case DRM_FORMAT_RGB888: - format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDBBSIFR_RPKF_RGB24; - break; - case DRM_FORMAT_ARGB8888: - format |= LDBBSIFR_AL_PK | LDBBSIFR_RY | LDDFR_PKF_ARGB32; - break; - case DRM_FORMAT_XRGB8888: - format |= LDBBSIFR_AL_1 | LDBBSIFR_RY | LDDFR_PKF_ARGB32; - break; - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV21: - format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_420; - break; - case DRM_FORMAT_NV16: - case DRM_FORMAT_NV61: - format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_422; - break; - case DRM_FORMAT_NV24: - case DRM_FORMAT_NV42: - format |= LDBBSIFR_AL_1 | LDBBSIFR_CHRR_444; - break; - } + format = LDBBSIFR_EN | ((state->alpha >> 8) << LDBBSIFR_LAY_SHIFT) | + sstate->format->ldbbsifr; #define plane_reg_dump(sdev, splane, reg) \ - dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x 0x%08x\n", __func__, \ + dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x 0x%08x\n", __func__, \ splane->index, #reg, \ lcdc_read(sdev, reg(splane->index)), \ lcdc_read(sdev, reg(splane->index) + LCDC_SIDE_B_OFFSET)) @@ -127,29 +112,27 @@ static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane, plane_reg_dump(sdev, splane, LDBnBSACR); lcdc_write(sdev, LDBCR, LDBCR_UPC(splane->index)); - dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x\n", __func__, splane->index, + dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x\n", __func__, splane->index, "LDBCR", lcdc_read(sdev, LDBCR)); lcdc_write(sdev, LDBnBSIFR(splane->index), format); lcdc_write(sdev, LDBnBSSZR(splane->index), - (splane->crtc_h << LDBBSSZR_BVSS_SHIFT) | - (splane->crtc_w << LDBBSSZR_BHSS_SHIFT)); + (state->crtc_h << LDBBSSZR_BVSS_SHIFT) | + (state->crtc_w << LDBBSSZR_BHSS_SHIFT)); lcdc_write(sdev, LDBnBLOCR(splane->index), - (splane->crtc_y << LDBBLOCR_CVLC_SHIFT) | - (splane->crtc_x << LDBBLOCR_CHLC_SHIFT)); + (state->crtc_y << LDBBLOCR_CVLC_SHIFT) | + (state->crtc_x << LDBBLOCR_CHLC_SHIFT)); lcdc_write(sdev, LDBnBSMWR(splane->index), fb->pitches[0] << LDBBSMWR_BSMW_SHIFT); - shmob_drm_plane_compute_base(splane, fb, splane->src_x, splane->src_y); - - lcdc_write(sdev, LDBnBSAYR(splane->index), splane->dma[0]); - if (splane->format->yuv) - lcdc_write(sdev, LDBnBSACR(splane->index), splane->dma[1]); + lcdc_write(sdev, LDBnBSAYR(splane->index), sstate->dma[0]); + if (shmob_drm_format_is_yuv(sstate->format)) + lcdc_write(sdev, LDBnBSACR(splane->index), sstate->dma[1]); lcdc_write(sdev, LDBCR, LDBCR_UPF(splane->index) | LDBCR_UPD(splane->index)); - dev_dbg(sdev->ddev->dev, "%s(%u): %s 0x%08x\n", __func__, splane->index, + dev_dbg(sdev->ddev.dev, "%s(%u): %s 0x%08x\n", __func__, splane->index, "LDBCR", lcdc_read(sdev, LDBCR)); plane_reg_dump(sdev, splane, LDBnBSIFR); @@ -160,75 +143,143 @@ static void __shmob_drm_plane_setup(struct shmob_drm_plane *splane, plane_reg_dump(sdev, splane, LDBnBSACR); } -void shmob_drm_plane_setup(struct drm_plane *plane) +static int shmob_drm_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) { - struct shmob_drm_plane *splane = to_shmob_plane(plane); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct shmob_drm_plane_state *sstate = to_shmob_plane_state(new_plane_state); + struct drm_crtc_state *crtc_state; + bool is_primary = plane->type == DRM_PLANE_TYPE_PRIMARY; + int ret; - if (plane->fb == NULL) - return; + if (!new_plane_state->crtc) { + /* + * The visible field is not reset by the DRM core but only + * updated by drm_atomic_helper_check_plane_state(), set it + * manually. + */ + new_plane_state->visible = false; + sstate->format = NULL; + return 0; + } - __shmob_drm_plane_setup(splane, plane->fb); -} + crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); -static int -shmob_drm_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, - struct drm_framebuffer *fb, int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct shmob_drm_plane *splane = to_shmob_plane(plane); - struct shmob_drm_device *sdev = plane->dev->dev_private; - const struct shmob_drm_format_info *format; + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + !is_primary, true); + if (ret < 0) + return ret; - format = shmob_drm_format_info(fb->format->format); - if (format == NULL) { - dev_dbg(sdev->dev, "update_plane: unsupported format %08x\n", - fb->format->format); - return -EINVAL; + if (!new_plane_state->visible) { + sstate->format = NULL; + return 0; } - if (src_w >> 16 != crtc_w || src_h >> 16 != crtc_h) { - dev_dbg(sdev->dev, "%s: scaling not supported\n", __func__); + sstate->format = shmob_drm_format_info(new_plane_state->fb->format->format); + if (!sstate->format) { + dev_dbg(plane->dev->dev, + "plane_atomic_check: unsupported format %p4cc\n", + &new_plane_state->fb->format->format); return -EINVAL; } - splane->format = format; + shmob_drm_plane_compute_base(sstate); - splane->src_x = src_x >> 16; - splane->src_y = src_y >> 16; - splane->crtc_x = crtc_x; - splane->crtc_y = crtc_y; - splane->crtc_w = crtc_w; - splane->crtc_h = crtc_h; - - __shmob_drm_plane_setup(splane, fb); return 0; } -static int shmob_drm_plane_disable(struct drm_plane *plane, - struct drm_modeset_acquire_ctx *ctx) +static void shmob_drm_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) { + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct shmob_drm_plane *splane = to_shmob_plane(plane); - struct shmob_drm_device *sdev = plane->dev->dev_private; - splane->format = NULL; + if (!new_plane_state->visible) + return; + if (plane->type == DRM_PLANE_TYPE_PRIMARY) + shmob_drm_primary_plane_setup(splane, new_plane_state); + else + shmob_drm_overlay_plane_setup(splane, new_plane_state); +} + +static void shmob_drm_plane_atomic_disable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane); + struct shmob_drm_device *sdev = to_shmob_device(plane->dev); + struct shmob_drm_plane *splane = to_shmob_plane(plane); + + if (!old_state->crtc) + return; + + if (plane->type != DRM_PLANE_TYPE_OVERLAY) + return; + + lcdc_write(sdev, LDBCR, LDBCR_UPC(splane->index)); lcdc_write(sdev, LDBnBSIFR(splane->index), 0); - return 0; + lcdc_write(sdev, LDBCR, + LDBCR_UPF(splane->index) | LDBCR_UPD(splane->index)); } -static void shmob_drm_plane_destroy(struct drm_plane *plane) +static struct drm_plane_state * +shmob_drm_plane_atomic_duplicate_state(struct drm_plane *plane) { - drm_plane_force_disable(plane); - drm_plane_cleanup(plane); + struct shmob_drm_plane_state *state; + struct shmob_drm_plane_state *copy; + + if (WARN_ON(!plane->state)) + return NULL; + + state = to_shmob_plane_state(plane->state); + copy = kmemdup(state, sizeof(*state), GFP_KERNEL); + if (copy == NULL) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, ©->base); + + return ©->base; } +static void shmob_drm_plane_atomic_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) +{ + __drm_atomic_helper_plane_destroy_state(state); + kfree(to_shmob_plane_state(state)); +} + +static void shmob_drm_plane_reset(struct drm_plane *plane) +{ + struct shmob_drm_plane_state *state; + + if (plane->state) { + shmob_drm_plane_atomic_destroy_state(plane, plane->state); + plane->state = NULL; + } + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (state == NULL) + return; + + __drm_atomic_helper_plane_reset(plane, &state->base); +} + +static const struct drm_plane_helper_funcs shmob_drm_plane_helper_funcs = { + .atomic_check = shmob_drm_plane_atomic_check, + .atomic_update = shmob_drm_plane_atomic_update, + .atomic_disable = shmob_drm_plane_atomic_disable, +}; + static const struct drm_plane_funcs shmob_drm_plane_funcs = { - .update_plane = shmob_drm_plane_update, - .disable_plane = shmob_drm_plane_disable, - .destroy = shmob_drm_plane_destroy, + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .reset = shmob_drm_plane_reset, + .atomic_duplicate_state = shmob_drm_plane_atomic_duplicate_state, + .atomic_destroy_state = shmob_drm_plane_atomic_destroy_state, }; static const uint32_t formats[] = { @@ -244,22 +295,23 @@ static const uint32_t formats[] = { DRM_FORMAT_NV42, }; -int shmob_drm_plane_create(struct shmob_drm_device *sdev, unsigned int index) +struct drm_plane *shmob_drm_plane_create(struct shmob_drm_device *sdev, + enum drm_plane_type type, + unsigned int index) { struct shmob_drm_plane *splane; - int ret; - splane = devm_kzalloc(sdev->dev, sizeof(*splane), GFP_KERNEL); - if (splane == NULL) - return -ENOMEM; + splane = drmm_universal_plane_alloc(&sdev->ddev, + struct shmob_drm_plane, base, 1, + &shmob_drm_plane_funcs, formats, + ARRAY_SIZE(formats), NULL, type, + NULL); + if (IS_ERR(splane)) + return ERR_CAST(splane); splane->index = index; - splane->alpha = 255; - ret = drm_universal_plane_init(sdev->ddev, &splane->plane, 1, - &shmob_drm_plane_funcs, - formats, ARRAY_SIZE(formats), NULL, - DRM_PLANE_TYPE_OVERLAY, NULL); + drm_plane_helper_add(&splane->base, &shmob_drm_plane_helper_funcs); - return ret; + return &splane->base; } diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h index e72b21a4288f..dcfddd605899 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_plane.h @@ -13,7 +13,8 @@ struct drm_plane; struct shmob_drm_device; -int shmob_drm_plane_create(struct shmob_drm_device *sdev, unsigned int index); -void shmob_drm_plane_setup(struct drm_plane *plane); +struct drm_plane *shmob_drm_plane_create(struct shmob_drm_device *sdev, + enum drm_plane_type type, + unsigned int index); #endif /* __SHMOB_DRM_PLANE_H__ */ diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 32f0857aec9f..e0174f82e353 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -910,7 +910,7 @@ static int ssd132x_primary_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct ssd130x_plane_state *ssd130x_state = to_ssd130x_plane_state(plane_state); struct drm_crtc *crtc = plane_state->crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *crtc_state = NULL; const struct drm_format_info *fi; unsigned int pitch; int ret; diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c index 5d12d7beef0e..ade3309ae042 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c @@ -26,7 +26,7 @@ struct vc4_dummy_crtc *vc4_mock_pv(struct kunit *test, struct vc4_crtc *vc4_crtc; int ret; - dummy_crtc = kunit_kzalloc(test, sizeof(*dummy_crtc), GFP_KERNEL); + dummy_crtc = drmm_kzalloc(drm, sizeof(*dummy_crtc), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, dummy_crtc); vc4_crtc = &dummy_crtc->crtc; diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c index 6e11fcc9ef45..e70d7c3076ac 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c @@ -32,7 +32,7 @@ struct vc4_dummy_output *vc4_dummy_output(struct kunit *test, struct drm_encoder *enc; int ret; - dummy_output = kunit_kzalloc(test, sizeof(*dummy_output), GFP_KERNEL); + dummy_output = drmm_kzalloc(drm, sizeof(*dummy_output), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dummy_output); dummy_output->encoder.type = vc4_encoder_type; diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 51aab662050b..e905734c26a0 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -316,26 +316,44 @@ static void bcm_iproc_i2c_slave_init( iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); } -static void bcm_iproc_i2c_check_slave_status( - struct bcm_iproc_i2c_dev *iproc_i2c) +static bool bcm_iproc_i2c_check_slave_status + (struct bcm_iproc_i2c_dev *iproc_i2c, u32 status) { u32 val; + bool recover = false; - val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET); - /* status is valid only when START_BUSY is cleared after it was set */ - if (val & BIT(S_CMD_START_BUSY_SHIFT)) - return; + /* check slave transmit status only if slave is transmitting */ + if (!iproc_i2c->slave_rx_only) { + val = iproc_i2c_rd_reg(iproc_i2c, S_CMD_OFFSET); + /* status is valid only when START_BUSY is cleared */ + if (!(val & BIT(S_CMD_START_BUSY_SHIFT))) { + val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK; + if (val == S_CMD_STATUS_TIMEOUT || + val == S_CMD_STATUS_MASTER_ABORT) { + dev_warn(iproc_i2c->device, + (val == S_CMD_STATUS_TIMEOUT) ? + "slave random stretch time timeout\n" : + "Master aborted read transaction\n"); + recover = true; + } + } + } + + /* RX_EVENT is not valid when START_BUSY is set */ + if ((status & BIT(IS_S_RX_EVENT_SHIFT)) && + (status & BIT(IS_S_START_BUSY_SHIFT))) { + dev_warn(iproc_i2c->device, "Slave aborted read transaction\n"); + recover = true; + } - val = (val >> S_CMD_STATUS_SHIFT) & S_CMD_STATUS_MASK; - if (val == S_CMD_STATUS_TIMEOUT || val == S_CMD_STATUS_MASTER_ABORT) { - dev_err(iproc_i2c->device, (val == S_CMD_STATUS_TIMEOUT) ? - "slave random stretch time timeout\n" : - "Master aborted read transaction\n"); + if (recover) { /* re-initialize i2c for recovery */ bcm_iproc_i2c_enable_disable(iproc_i2c, false); bcm_iproc_i2c_slave_init(iproc_i2c, true); bcm_iproc_i2c_enable_disable(iproc_i2c, true); } + + return recover; } static void bcm_iproc_i2c_slave_read(struct bcm_iproc_i2c_dev *iproc_i2c) @@ -420,48 +438,6 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, u32 val; u8 value; - /* - * Slave events in case of master-write, master-write-read and, - * master-read - * - * Master-write : only IS_S_RX_EVENT_SHIFT event - * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT - * events - * Master-read : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT - * events or only IS_S_RD_EVENT_SHIFT - * - * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt - * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes - * full. This can happen if Master issues write requests of more than - * 64 bytes. - */ - if (status & BIT(IS_S_RX_EVENT_SHIFT) || - status & BIT(IS_S_RD_EVENT_SHIFT) || - status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) { - /* disable slave interrupts */ - val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); - val &= ~iproc_i2c->slave_int_mask; - iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); - - if (status & BIT(IS_S_RD_EVENT_SHIFT)) - /* Master-write-read request */ - iproc_i2c->slave_rx_only = false; - else - /* Master-write request only */ - iproc_i2c->slave_rx_only = true; - - /* schedule tasklet to read data later */ - tasklet_schedule(&iproc_i2c->slave_rx_tasklet); - - /* - * clear only IS_S_RX_EVENT_SHIFT and - * IS_S_RX_FIFO_FULL_SHIFT interrupt. - */ - val = BIT(IS_S_RX_EVENT_SHIFT); - if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) - val |= BIT(IS_S_RX_FIFO_FULL_SHIFT); - iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val); - } if (status & BIT(IS_S_TX_UNDERRUN_SHIFT)) { iproc_i2c->tx_underrun++; @@ -493,8 +469,9 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, * less than PKT_LENGTH bytes were output on the SMBUS */ iproc_i2c->slave_int_mask &= ~BIT(IE_S_TX_UNDERRUN_SHIFT); - iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, - iproc_i2c->slave_int_mask); + val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); + val &= ~BIT(IE_S_TX_UNDERRUN_SHIFT); + iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); /* End of SMBUS for Master Read */ val = BIT(S_TX_WR_STATUS_SHIFT); @@ -515,9 +492,49 @@ static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, BIT(IS_S_START_BUSY_SHIFT)); } - /* check slave transmit status only if slave is transmitting */ - if (!iproc_i2c->slave_rx_only) - bcm_iproc_i2c_check_slave_status(iproc_i2c); + /* if the controller has been reset, immediately return from the ISR */ + if (bcm_iproc_i2c_check_slave_status(iproc_i2c, status)) + return true; + + /* + * Slave events in case of master-write, master-write-read and, + * master-read + * + * Master-write : only IS_S_RX_EVENT_SHIFT event + * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT + * events + * Master-read : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT + * events or only IS_S_RD_EVENT_SHIFT + * + * iproc has a slave rx fifo size of 64 bytes. Rx fifo full interrupt + * (IS_S_RX_FIFO_FULL_SHIFT) will be generated when RX fifo becomes + * full. This can happen if Master issues write requests of more than + * 64 bytes. + */ + if (status & BIT(IS_S_RX_EVENT_SHIFT) || + status & BIT(IS_S_RD_EVENT_SHIFT) || + status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) { + /* disable slave interrupts */ + val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); + val &= ~iproc_i2c->slave_int_mask; + iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); + + if (status & BIT(IS_S_RD_EVENT_SHIFT)) + /* Master-write-read request */ + iproc_i2c->slave_rx_only = false; + else + /* Master-write request only */ + iproc_i2c->slave_rx_only = true; + + /* schedule tasklet to read data later */ + tasklet_schedule(&iproc_i2c->slave_rx_tasklet); + + /* clear IS_S_RX_FIFO_FULL_SHIFT interrupt */ + if (status & BIT(IS_S_RX_FIFO_FULL_SHIFT)) { + val = BIT(IS_S_RX_FIFO_FULL_SHIFT); + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, val); + } + } return true; } diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c index 20f8f7c9a8cd..cf3747d87034 100644 --- a/drivers/i2c/busses/i2c-cp2615.c +++ b/drivers/i2c/busses/i2c-cp2615.c @@ -85,7 +85,7 @@ static int cp2615_init_iop_msg(struct cp2615_iop_msg *ret, enum cp2615_iop_msg_t if (!ret) return -EINVAL; - ret->preamble = 0x2A2A; + ret->preamble = htons(0x2A2AU); ret->length = htons(data_len + 6); ret->msg = htons(msg); if (data && data_len) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index ca1035e010c7..85dbd0eb5392 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -519,10 +519,16 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev) /* * Because we don't know the buffer length in the - * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop - * the transaction here. + * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop the + * transaction here. Also disable the TX_EMPTY IRQ + * while waiting for the data length byte to avoid the + * bogus interrupts flood. */ - if (buf_len > 0 || flags & I2C_M_RECV_LEN) { + if (flags & I2C_M_RECV_LEN) { + dev->status |= STATUS_WRITE_IN_PROGRESS; + intr_mask &= ~DW_IC_INTR_TX_EMPTY; + break; + } else if (buf_len > 0) { /* more bytes to be written */ dev->status |= STATUS_WRITE_IN_PROGRESS; break; @@ -558,6 +564,13 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len) msgs[dev->msg_read_idx].len = len; msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN; + /* + * Received buffer length, re-enable TX_EMPTY interrupt + * to resume the SMBUS transaction. + */ + regmap_update_bits(dev->map, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY, + DW_IC_INTR_TX_EMPTY); + return len; } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index a91201509bc1..8b7e599f1674 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -450,8 +450,8 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; - rdwr_pa = memdup_user(rdwr_arg.msgs, - rdwr_arg.nmsgs * sizeof(struct i2c_msg)); + rdwr_pa = memdup_array_user(rdwr_arg.msgs, + rdwr_arg.nmsgs, sizeof(struct i2c_msg)); if (IS_ERR(rdwr_pa)) return PTR_ERR(rdwr_pa); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 95f90699d2b1..51e0c4954600 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -50,7 +50,7 @@ struct evdev_client { bool revoked; unsigned long *evmasks[EV_CNT]; unsigned int bufsize; - struct input_event buffer[]; + struct input_event buffer[] __counted_by(bufsize); }; static size_t evdev_get_mask_cnt(unsigned int type) diff --git a/drivers/input/input-leds.c b/drivers/input/input-leds.c index 0b11990ade46..0e935914bc3a 100644 --- a/drivers/input/input-leds.c +++ b/drivers/input/input-leds.c @@ -44,7 +44,7 @@ struct input_led { struct input_leds { struct input_handle handle; unsigned int num_leds; - struct input_led leds[]; + struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c index 56abc8c6c763..27d95d6cf56e 100644 --- a/drivers/input/joystick/walkera0701.c +++ b/drivers/input/joystick/walkera0701.c @@ -296,15 +296,4 @@ static struct parport_driver walkera0701_parport_driver = { .devmodel = true, }; -static int __init walkera0701_init(void) -{ - return parport_register_driver(&walkera0701_parport_driver); -} - -static void __exit walkera0701_exit(void) -{ - parport_unregister_driver(&walkera0701_parport_driver); -} - -module_init(walkera0701_init); -module_exit(walkera0701_exit); +module_parport_driver(walkera0701_parport_driver); diff --git a/drivers/input/keyboard/adp5520-keys.c b/drivers/input/keyboard/adp5520-keys.c index 7851ffd678a8..10c248f0c1fc 100644 --- a/drivers/input/keyboard/adp5520-keys.c +++ b/drivers/input/keyboard/adp5520-keys.c @@ -168,14 +168,12 @@ static int adp5520_keys_probe(struct platform_device *pdev) return 0; } -static int adp5520_keys_remove(struct platform_device *pdev) +static void adp5520_keys_remove(struct platform_device *pdev) { struct adp5520_keys *dev = platform_get_drvdata(pdev); adp5520_unregister_notifier(dev->master, &dev->notifier, ADP5520_KP_IEN | ADP5520_KR_IEN); - - return 0; } static struct platform_driver adp5520_keys_driver = { @@ -183,7 +181,7 @@ static struct platform_driver adp5520_keys_driver = { .name = "adp5520-keys", }, .probe = adp5520_keys_probe, - .remove = adp5520_keys_remove, + .remove_new = adp5520_keys_remove, }; module_platform_driver(adp5520_keys_driver); diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index e7ecfca838df..30678a34cf64 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -686,10 +686,11 @@ static umode_t cros_ec_keyb_attr_is_visible(struct kobject *kobj, return attr->mode; } -static const struct attribute_group cros_ec_keyb_attr_group = { +static const struct attribute_group cros_ec_keyb_group = { .is_visible = cros_ec_keyb_attr_is_visible, .attrs = cros_ec_keyb_attrs, }; +__ATTRIBUTE_GROUPS(cros_ec_keyb); static int cros_ec_keyb_probe(struct platform_device *pdev) { @@ -730,12 +731,6 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) return err; } - err = devm_device_add_group(dev, &cros_ec_keyb_attr_group); - if (err) { - dev_err(dev, "failed to create attributes: %d\n", err); - return err; - } - ckdev->notifier.notifier_call = cros_ec_keyb_work; err = blocking_notifier_chain_register(&ckdev->ec->event_notifier, &ckdev->notifier); @@ -748,14 +743,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) return 0; } -static int cros_ec_keyb_remove(struct platform_device *pdev) +static void cros_ec_keyb_remove(struct platform_device *pdev) { struct cros_ec_keyb *ckdev = dev_get_drvdata(&pdev->dev); blocking_notifier_chain_unregister(&ckdev->ec->event_notifier, &ckdev->notifier); - - return 0; } #ifdef CONFIG_ACPI @@ -779,9 +772,10 @@ static DEFINE_SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume); static struct platform_driver cros_ec_keyb_driver = { .probe = cros_ec_keyb_probe, - .remove = cros_ec_keyb_remove, + .remove_new = cros_ec_keyb_remove, .driver = { .name = "cros-ec-keyb", + .dev_groups = cros_ec_keyb_groups, .of_match_table = of_match_ptr(cros_ec_keyb_of_match), .acpi_match_table = ACPI_PTR(cros_ec_keyb_acpi_match), .pm = pm_sleep_ptr(&cros_ec_keyb_pm_ops), diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index 55075addcac2..6b811d6bf625 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -308,11 +308,9 @@ static int ep93xx_keypad_probe(struct platform_device *pdev) return 0; } -static int ep93xx_keypad_remove(struct platform_device *pdev) +static void ep93xx_keypad_remove(struct platform_device *pdev) { dev_pm_clear_wake_irq(&pdev->dev); - - return 0; } static struct platform_driver ep93xx_keypad_driver = { @@ -321,7 +319,7 @@ static struct platform_driver ep93xx_keypad_driver = { .pm = pm_sleep_ptr(&ep93xx_keypad_pm_ops), }, .probe = ep93xx_keypad_probe, - .remove = ep93xx_keypad_remove, + .remove_new = ep93xx_keypad_remove, }; module_platform_driver(ep93xx_keypad_driver); diff --git a/drivers/input/keyboard/iqs62x-keys.c b/drivers/input/keyboard/iqs62x-keys.c index 02ceebad7bda..688d61244b5f 100644 --- a/drivers/input/keyboard/iqs62x-keys.c +++ b/drivers/input/keyboard/iqs62x-keys.c @@ -310,7 +310,7 @@ static int iqs62x_keys_probe(struct platform_device *pdev) return ret; } -static int iqs62x_keys_remove(struct platform_device *pdev) +static void iqs62x_keys_remove(struct platform_device *pdev) { struct iqs62x_keys_private *iqs62x_keys = platform_get_drvdata(pdev); int ret; @@ -319,8 +319,6 @@ static int iqs62x_keys_remove(struct platform_device *pdev) &iqs62x_keys->notifier); if (ret) dev_err(&pdev->dev, "Failed to unregister notifier: %d\n", ret); - - return 0; } static struct platform_driver iqs62x_keys_platform_driver = { @@ -328,7 +326,7 @@ static struct platform_driver iqs62x_keys_platform_driver = { .name = "iqs62x-keys", }, .probe = iqs62x_keys_probe, - .remove = iqs62x_keys_remove, + .remove_new = iqs62x_keys_remove, }; module_platform_driver(iqs62x_keys_platform_driver); diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index a1b037891af2..50fa764c82d2 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -549,15 +549,13 @@ err_free_mem: return err; } -static int matrix_keypad_remove(struct platform_device *pdev) +static void matrix_keypad_remove(struct platform_device *pdev) { struct matrix_keypad *keypad = platform_get_drvdata(pdev); matrix_keypad_free_gpio(keypad); input_unregister_device(keypad->input_dev); kfree(keypad); - - return 0; } #ifdef CONFIG_OF @@ -570,7 +568,7 @@ MODULE_DEVICE_TABLE(of, matrix_keypad_dt_match); static struct platform_driver matrix_keypad_driver = { .probe = matrix_keypad_probe, - .remove = matrix_keypad_remove, + .remove_new = matrix_keypad_remove, .driver = { .name = "matrix-keypad", .pm = pm_sleep_ptr(&matrix_keypad_pm_ops), diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index 24440b498645..454fb8675657 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -287,7 +287,7 @@ err2: return -EINVAL; } -static int omap_kp_remove(struct platform_device *pdev) +static void omap_kp_remove(struct platform_device *pdev) { struct omap_kp *omap_kp = platform_get_drvdata(pdev); @@ -303,13 +303,11 @@ static int omap_kp_remove(struct platform_device *pdev) input_unregister_device(omap_kp->input); kfree(omap_kp); - - return 0; } static struct platform_driver omap_kp_driver = { .probe = omap_kp_probe, - .remove = omap_kp_remove, + .remove_new = omap_kp_remove, .driver = { .name = "omap-keypad", }, diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 773e55eed88b..d3f8688fdd9c 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -461,11 +461,9 @@ static int omap4_keypad_probe(struct platform_device *pdev) return 0; } -static int omap4_keypad_remove(struct platform_device *pdev) +static void omap4_keypad_remove(struct platform_device *pdev) { dev_pm_clear_wake_irq(&pdev->dev); - - return 0; } static const struct of_device_id omap_keypad_dt_match[] = { @@ -476,7 +474,7 @@ MODULE_DEVICE_TABLE(of, omap_keypad_dt_match); static struct platform_driver omap4_keypad_driver = { .probe = omap4_keypad_probe, - .remove = omap4_keypad_remove, + .remove_new = omap4_keypad_remove, .driver = { .name = "omap4-keypad", .of_match_table = omap_keypad_dt_match, diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c index d85dd2489293..e212eff7687c 100644 --- a/drivers/input/keyboard/samsung-keypad.c +++ b/drivers/input/keyboard/samsung-keypad.c @@ -444,7 +444,7 @@ err_unprepare_clk: return error; } -static int samsung_keypad_remove(struct platform_device *pdev) +static void samsung_keypad_remove(struct platform_device *pdev) { struct samsung_keypad *keypad = platform_get_drvdata(pdev); @@ -453,8 +453,6 @@ static int samsung_keypad_remove(struct platform_device *pdev) input_unregister_device(keypad->input_dev); clk_unprepare(keypad->clk); - - return 0; } static int samsung_keypad_runtime_suspend(struct device *dev) @@ -589,7 +587,7 @@ MODULE_DEVICE_TABLE(platform, samsung_keypad_driver_ids); static struct platform_driver samsung_keypad_driver = { .probe = samsung_keypad_probe, - .remove = samsung_keypad_remove, + .remove_new = samsung_keypad_remove, .driver = { .name = "samsung-keypad", .of_match_table = of_match_ptr(samsung_keypad_dt_match), diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 2c00320f739f..4ea4fd25c5d2 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -265,7 +265,7 @@ static int sh_keysc_probe(struct platform_device *pdev) return error; } -static int sh_keysc_remove(struct platform_device *pdev) +static void sh_keysc_remove(struct platform_device *pdev) { struct sh_keysc_priv *priv = platform_get_drvdata(pdev); @@ -279,8 +279,6 @@ static int sh_keysc_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); kfree(priv); - - return 0; } static int sh_keysc_suspend(struct device *dev) @@ -321,7 +319,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(sh_keysc_dev_pm_ops, static struct platform_driver sh_keysc_device_driver = { .probe = sh_keysc_probe, - .remove = sh_keysc_remove, + .remove_new = sh_keysc_remove, .driver = { .name = "sh_keysc", .pm = pm_sleep_ptr(&sh_keysc_dev_pm_ops), diff --git a/drivers/input/keyboard/spear-keyboard.c b/drivers/input/keyboard/spear-keyboard.c index a50fa9915381..557d00a667ce 100644 --- a/drivers/input/keyboard/spear-keyboard.c +++ b/drivers/input/keyboard/spear-keyboard.c @@ -272,14 +272,12 @@ static int spear_kbd_probe(struct platform_device *pdev) return 0; } -static int spear_kbd_remove(struct platform_device *pdev) +static void spear_kbd_remove(struct platform_device *pdev) { struct spear_kbd *kbd = platform_get_drvdata(pdev); input_unregister_device(kbd->input); clk_unprepare(kbd->clk); - - return 0; } static int spear_kbd_suspend(struct device *dev) @@ -375,7 +373,7 @@ MODULE_DEVICE_TABLE(of, spear_kbd_id_table); static struct platform_driver spear_kbd_driver = { .probe = spear_kbd_probe, - .remove = spear_kbd_remove, + .remove_new = spear_kbd_remove, .driver = { .name = "keyboard", .pm = pm_sleep_ptr(&spear_kbd_pm_ops), diff --git a/drivers/input/keyboard/stmpe-keypad.c b/drivers/input/keyboard/stmpe-keypad.c index 2c6c53290cc0..2013c0afd0c3 100644 --- a/drivers/input/keyboard/stmpe-keypad.c +++ b/drivers/input/keyboard/stmpe-keypad.c @@ -404,20 +404,18 @@ static int stmpe_keypad_probe(struct platform_device *pdev) return 0; } -static int stmpe_keypad_remove(struct platform_device *pdev) +static void stmpe_keypad_remove(struct platform_device *pdev) { struct stmpe_keypad *keypad = platform_get_drvdata(pdev); stmpe_disable(keypad->stmpe, STMPE_BLOCK_KEYPAD); - - return 0; } static struct platform_driver stmpe_keypad_driver = { .driver.name = "stmpe-keypad", .driver.owner = THIS_MODULE, .probe = stmpe_keypad_probe, - .remove = stmpe_keypad_remove, + .remove_new = stmpe_keypad_remove, }; module_platform_driver(stmpe_keypad_driver); diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index c9a823ea45d0..a1765ed8c825 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -14,7 +14,7 @@ #include <linux/io.h> #include <linux/interrupt.h> #include <linux/of.h> -#include <linux/of_device.h> +#include <linux/property.h> #include <linux/clk.h> #include <linux/slab.h> #include <linux/input/matrix_keypad.h> @@ -602,9 +602,6 @@ static int tegra_kbc_probe(struct platform_device *pdev) unsigned int debounce_cnt; unsigned int scan_time_rows; unsigned int keymap_rows; - const struct of_device_id *match; - - match = of_match_device(tegra_kbc_of_match, &pdev->dev); kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL); if (!kbc) { @@ -613,7 +610,7 @@ static int tegra_kbc_probe(struct platform_device *pdev) } kbc->dev = &pdev->dev; - kbc->hw_support = match->data; + kbc->hw_support = device_get_match_data(&pdev->dev); kbc->max_keys = kbc->hw_support->max_rows * kbc->hw_support->max_columns; kbc->num_rows_and_columns = kbc->hw_support->max_rows + diff --git a/drivers/input/misc/88pm80x_onkey.c b/drivers/input/misc/88pm80x_onkey.c index 51c8a326fd06..31f0702c3d01 100644 --- a/drivers/input/misc/88pm80x_onkey.c +++ b/drivers/input/misc/88pm80x_onkey.c @@ -138,14 +138,13 @@ out: return err; } -static int pm80x_onkey_remove(struct platform_device *pdev) +static void pm80x_onkey_remove(struct platform_device *pdev) { struct pm80x_onkey_info *info = platform_get_drvdata(pdev); pm80x_free_irq(info->pm80x, info->irq, info); input_unregister_device(info->idev); kfree(info); - return 0; } static struct platform_driver pm80x_onkey_driver = { @@ -154,7 +153,7 @@ static struct platform_driver pm80x_onkey_driver = { .pm = &pm80x_onkey_pm_ops, }, .probe = pm80x_onkey_probe, - .remove = pm80x_onkey_remove, + .remove_new = pm80x_onkey_remove, }; module_platform_driver(pm80x_onkey_driver); diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index 4581606a28d6..24f9e9d893de 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -133,20 +133,11 @@ static ssize_t axp20x_store_attr(struct device *dev, size_t count) { struct axp20x_pek *axp20x_pek = dev_get_drvdata(dev); - char val_str[20]; - size_t len; int ret, i; unsigned int val, idx = 0; unsigned int best_err = UINT_MAX; - val_str[sizeof(val_str) - 1] = '\0'; - strncpy(val_str, buf, sizeof(val_str) - 1); - len = strlen(val_str); - - if (len && val_str[len - 1] == '\n') - val_str[len - 1] = '\0'; - - ret = kstrtouint(val_str, 10, &val); + ret = kstrtouint(buf, 10, &val); if (ret) return ret; diff --git a/drivers/input/misc/da9052_onkey.c b/drivers/input/misc/da9052_onkey.c index 6d1152850a6d..7a1122e1efb9 100644 --- a/drivers/input/misc/da9052_onkey.c +++ b/drivers/input/misc/da9052_onkey.c @@ -127,7 +127,7 @@ err_free_mem: return error; } -static int da9052_onkey_remove(struct platform_device *pdev) +static void da9052_onkey_remove(struct platform_device *pdev) { struct da9052_onkey *onkey = platform_get_drvdata(pdev); @@ -136,13 +136,11 @@ static int da9052_onkey_remove(struct platform_device *pdev) input_unregister_device(onkey->input); kfree(onkey); - - return 0; } static struct platform_driver da9052_onkey_driver = { .probe = da9052_onkey_probe, - .remove = da9052_onkey_remove, + .remove_new = da9052_onkey_remove, .driver = { .name = "da9052-onkey", }, diff --git a/drivers/input/misc/da9055_onkey.c b/drivers/input/misc/da9055_onkey.c index 7a0d3a1d503c..871812f1b398 100644 --- a/drivers/input/misc/da9055_onkey.c +++ b/drivers/input/misc/da9055_onkey.c @@ -132,7 +132,7 @@ err_free_input: return err; } -static int da9055_onkey_remove(struct platform_device *pdev) +static void da9055_onkey_remove(struct platform_device *pdev) { struct da9055_onkey *onkey = platform_get_drvdata(pdev); int irq = platform_get_irq_byname(pdev, "ONKEY"); @@ -141,13 +141,11 @@ static int da9055_onkey_remove(struct platform_device *pdev) free_irq(irq, onkey); cancel_delayed_work_sync(&onkey->work); input_unregister_device(onkey->input); - - return 0; } static struct platform_driver da9055_onkey_driver = { .probe = da9055_onkey_probe, - .remove = da9055_onkey_remove, + .remove_new = da9055_onkey_remove, .driver = { .name = "da9055-onkey", }, diff --git a/drivers/input/misc/ideapad_slidebar.c b/drivers/input/misc/ideapad_slidebar.c index 68f1c584da05..fa4e7f67d713 100644 --- a/drivers/input/misc/ideapad_slidebar.c +++ b/drivers/input/misc/ideapad_slidebar.c @@ -256,20 +256,18 @@ err_release_ports: return err; } -static int ideapad_remove(struct platform_device *pdev) +static void ideapad_remove(struct platform_device *pdev) { i8042_remove_filter(slidebar_i8042_filter); input_unregister_device(slidebar_input_dev); release_region(IDEAPAD_BASE, 3); - - return 0; } static struct platform_driver slidebar_drv = { .driver = { .name = "ideapad_slidebar", }, - .remove = ideapad_remove, + .remove_new = ideapad_remove, }; static int __init ideapad_dmi_check(const struct dmi_system_id *id) diff --git a/drivers/input/misc/iqs269a.c b/drivers/input/misc/iqs269a.c index c0a085639870..3c636c75e8a1 100644 --- a/drivers/input/misc/iqs269a.c +++ b/drivers/input/misc/iqs269a.c @@ -1586,10 +1586,7 @@ static struct attribute *iqs269_attrs[] = { &dev_attr_ati_trigger.attr, NULL, }; - -static const struct attribute_group iqs269_attr_group = { - .attrs = iqs269_attrs, -}; +ATTRIBUTE_GROUPS(iqs269); static const struct regmap_config iqs269_regmap_config = { .reg_bits = 8, @@ -1671,10 +1668,6 @@ static int iqs269_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &iqs269_attr_group); - if (error) - dev_err(&client->dev, "Failed to add attributes: %d\n", error); - return error; } @@ -1743,6 +1736,7 @@ MODULE_DEVICE_TABLE(of, iqs269_of_match); static struct i2c_driver iqs269_i2c_driver = { .driver = { .name = "iqs269a", + .dev_groups = iqs269_groups, .of_match_table = iqs269_of_match, .pm = pm_sleep_ptr(&iqs269_pm), }, diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c index 912e614d039d..d47269b10e9a 100644 --- a/drivers/input/misc/kxtj9.c +++ b/drivers/input/misc/kxtj9.c @@ -334,14 +334,25 @@ static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll); -static struct attribute *kxtj9_attributes[] = { +static struct attribute *kxtj9_attrs[] = { &dev_attr_poll.attr, NULL }; -static struct attribute_group kxtj9_attribute_group = { - .attrs = kxtj9_attributes +static umode_t kxtj9_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct i2c_client *client = to_i2c_client(dev); + + return client->irq ? attr->mode : 0; +} + +static struct attribute_group kxtj9_group = { + .attrs = kxtj9_attrs, + .is_visible = kxtj9_attr_is_visible, }; +__ATTRIBUTE_GROUPS(kxtj9); static void kxtj9_poll(struct input_dev *input) { @@ -482,13 +493,6 @@ static int kxtj9_probe(struct i2c_client *client) dev_err(&client->dev, "request irq failed: %d\n", err); return err; } - - err = devm_device_add_group(&client->dev, - &kxtj9_attribute_group); - if (err) { - dev_err(&client->dev, "sysfs create failed: %d\n", err); - return err; - } } return 0; @@ -535,8 +539,9 @@ MODULE_DEVICE_TABLE(i2c, kxtj9_id); static struct i2c_driver kxtj9_driver = { .driver = { - .name = NAME, - .pm = pm_sleep_ptr(&kxtj9_pm_ops), + .name = NAME, + .dev_groups = kxtj9_groups, + .pm = pm_sleep_ptr(&kxtj9_pm_ops), }, .probe = kxtj9_probe, .id_table = kxtj9_id, diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c index 25fcf1467151..3fe0a85c45e0 100644 --- a/drivers/input/misc/m68kspkr.c +++ b/drivers/input/misc/m68kspkr.c @@ -75,15 +75,13 @@ static int m68kspkr_probe(struct platform_device *dev) return 0; } -static int m68kspkr_remove(struct platform_device *dev) +static void m68kspkr_remove(struct platform_device *dev) { struct input_dev *input_dev = platform_get_drvdata(dev); input_unregister_device(input_dev); /* turn off the speaker */ m68kspkr_event(NULL, EV_SND, SND_BELL, 0); - - return 0; } static void m68kspkr_shutdown(struct platform_device *dev) @@ -97,7 +95,7 @@ static struct platform_driver m68kspkr_platform_driver = { .name = "m68kspkr", }, .probe = m68kspkr_probe, - .remove = m68kspkr_remove, + .remove_new = m68kspkr_remove, .shutdown = m68kspkr_shutdown, }; diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c index c4dff476d479..8861a67be575 100644 --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -351,7 +351,7 @@ err_free_mem: return error; } -static int max8997_haptic_remove(struct platform_device *pdev) +static void max8997_haptic_remove(struct platform_device *pdev) { struct max8997_haptic *chip = platform_get_drvdata(pdev); @@ -362,8 +362,6 @@ static int max8997_haptic_remove(struct platform_device *pdev) pwm_put(chip->pwm); kfree(chip); - - return 0; } static int max8997_haptic_suspend(struct device *dev) @@ -391,7 +389,7 @@ static struct platform_driver max8997_haptic_driver = { .pm = pm_sleep_ptr(&max8997_haptic_pm_ops), }, .probe = max8997_haptic_probe, - .remove = max8997_haptic_remove, + .remove_new = max8997_haptic_remove, .id_table = max8997_haptic_id, }; module_platform_driver(max8997_haptic_driver); diff --git a/drivers/input/misc/mc13783-pwrbutton.c b/drivers/input/misc/mc13783-pwrbutton.c index 0636eee4bb6c..1c8c939638f6 100644 --- a/drivers/input/misc/mc13783-pwrbutton.c +++ b/drivers/input/misc/mc13783-pwrbutton.c @@ -229,7 +229,7 @@ free_input_dev: return err; } -static int mc13783_pwrbutton_remove(struct platform_device *pdev) +static void mc13783_pwrbutton_remove(struct platform_device *pdev) { struct mc13783_pwrb *priv = platform_get_drvdata(pdev); const struct mc13xxx_buttons_platform_data *pdata; @@ -249,13 +249,11 @@ static int mc13783_pwrbutton_remove(struct platform_device *pdev) input_unregister_device(priv->pwr); kfree(priv); - - return 0; } static struct platform_driver mc13783_pwrbutton_driver = { .probe = mc13783_pwrbutton_probe, - .remove = mc13783_pwrbutton_remove, + .remove_new = mc13783_pwrbutton_remove, .driver = { .name = "mc13783-pwrbutton", }, diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c index 7e361727b0d9..06d5972e8e84 100644 --- a/drivers/input/misc/palmas-pwrbutton.c +++ b/drivers/input/misc/palmas-pwrbutton.c @@ -245,7 +245,7 @@ err_free_mem: * * Return: 0 */ -static int palmas_pwron_remove(struct platform_device *pdev) +static void palmas_pwron_remove(struct platform_device *pdev) { struct palmas_pwron *pwron = platform_get_drvdata(pdev); @@ -254,8 +254,6 @@ static int palmas_pwron_remove(struct platform_device *pdev) input_unregister_device(pwron->input_dev); kfree(pwron); - - return 0; } /** @@ -312,7 +310,7 @@ MODULE_DEVICE_TABLE(of, of_palmas_pwr_match); static struct platform_driver palmas_pwron_driver = { .probe = palmas_pwron_probe, - .remove = palmas_pwron_remove, + .remove_new = palmas_pwron_remove, .driver = { .name = "palmas_pwrbutton", .of_match_table = of_match_ptr(of_palmas_pwr_match), diff --git a/drivers/input/misc/pcap_keys.c b/drivers/input/misc/pcap_keys.c index b5a53636d7e2..8a7e9ada5952 100644 --- a/drivers/input/misc/pcap_keys.c +++ b/drivers/input/misc/pcap_keys.c @@ -99,7 +99,7 @@ fail: return err; } -static int pcap_keys_remove(struct platform_device *pdev) +static void pcap_keys_remove(struct platform_device *pdev) { struct pcap_keys *pcap_keys = platform_get_drvdata(pdev); @@ -108,13 +108,11 @@ static int pcap_keys_remove(struct platform_device *pdev) input_unregister_device(pcap_keys->input); kfree(pcap_keys); - - return 0; } static struct platform_driver pcap_keys_device_driver = { .probe = pcap_keys_probe, - .remove = pcap_keys_remove, + .remove_new = pcap_keys_remove, .driver = { .name = "pcap-keys", } diff --git a/drivers/input/misc/pcf50633-input.c b/drivers/input/misc/pcf50633-input.c index 4c60c70c4c10..c5c5fe236c18 100644 --- a/drivers/input/misc/pcf50633-input.c +++ b/drivers/input/misc/pcf50633-input.c @@ -87,7 +87,7 @@ static int pcf50633_input_probe(struct platform_device *pdev) return 0; } -static int pcf50633_input_remove(struct platform_device *pdev) +static void pcf50633_input_remove(struct platform_device *pdev) { struct pcf50633_input *input = platform_get_drvdata(pdev); @@ -96,8 +96,6 @@ static int pcf50633_input_remove(struct platform_device *pdev) input_unregister_device(input->input_dev); kfree(input); - - return 0; } static struct platform_driver pcf50633_input_driver = { @@ -105,7 +103,7 @@ static struct platform_driver pcf50633_input_driver = { .name = "pcf50633-input", }, .probe = pcf50633_input_probe, - .remove = pcf50633_input_remove, + .remove_new = pcf50633_input_remove, }; module_platform_driver(pcf50633_input_driver); diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c index 9c666b2f14fe..897854fd245f 100644 --- a/drivers/input/misc/pcspkr.c +++ b/drivers/input/misc/pcspkr.c @@ -95,15 +95,13 @@ static int pcspkr_probe(struct platform_device *dev) return 0; } -static int pcspkr_remove(struct platform_device *dev) +static void pcspkr_remove(struct platform_device *dev) { struct input_dev *pcspkr_dev = platform_get_drvdata(dev); input_unregister_device(pcspkr_dev); /* turn off the speaker */ pcspkr_event(NULL, EV_SND, SND_BELL, 0); - - return 0; } static int pcspkr_suspend(struct device *dev) @@ -129,7 +127,7 @@ static struct platform_driver pcspkr_platform_driver = { .pm = &pcspkr_pm_ops, }, .probe = pcspkr_probe, - .remove = pcspkr_remove, + .remove_new = pcspkr_remove, .shutdown = pcspkr_shutdown, }; module_platform_driver(pcspkr_platform_driver); diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c index ba747c5b2b5f..bab710023d8f 100644 --- a/drivers/input/misc/pm8941-pwrkey.c +++ b/drivers/input/misc/pm8941-pwrkey.c @@ -408,14 +408,12 @@ static int pm8941_pwrkey_probe(struct platform_device *pdev) return 0; } -static int pm8941_pwrkey_remove(struct platform_device *pdev) +static void pm8941_pwrkey_remove(struct platform_device *pdev) { struct pm8941_pwrkey *pwrkey = platform_get_drvdata(pdev); if (pwrkey->data->supports_ps_hold_poff_config) unregister_reboot_notifier(&pwrkey->reboot_notifier); - - return 0; } static const struct pm8941_data pwrkey_data = { @@ -467,7 +465,7 @@ MODULE_DEVICE_TABLE(of, pm8941_pwr_key_id_table); static struct platform_driver pm8941_pwrkey_driver = { .probe = pm8941_pwrkey_probe, - .remove = pm8941_pwrkey_remove, + .remove_new = pm8941_pwrkey_remove, .driver = { .name = "pm8941-pwrkey", .pm = pm_sleep_ptr(&pm8941_pwr_key_pm_ops), diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index e79f5497948b..08bcee3d6bcc 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -411,7 +411,7 @@ out: return button_info; } -static int soc_button_remove(struct platform_device *pdev) +static void soc_button_remove(struct platform_device *pdev) { struct soc_button_data *priv = platform_get_drvdata(pdev); @@ -420,8 +420,6 @@ static int soc_button_remove(struct platform_device *pdev) for (i = 0; i < BUTTON_TYPES; i++) if (priv->children[i]) platform_device_unregister(priv->children[i]); - - return 0; } static int soc_button_probe(struct platform_device *pdev) @@ -609,7 +607,7 @@ MODULE_DEVICE_TABLE(acpi, soc_button_acpi_match); static struct platform_driver soc_button_driver = { .probe = soc_button_probe, - .remove = soc_button_remove, + .remove_new = soc_button_remove, .driver = { .name = KBUILD_MODNAME, .acpi_match_table = ACPI_PTR(soc_button_acpi_match), diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index e5dd84725c6e..20020cbc0752 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -231,7 +231,7 @@ out_err: return err; } -static int bbc_remove(struct platform_device *op) +static void bbc_remove(struct platform_device *op) { struct sparcspkr_state *state = platform_get_drvdata(op); struct input_dev *input_dev = state->input_dev; @@ -245,8 +245,6 @@ static int bbc_remove(struct platform_device *op) of_iounmap(&op->resource[0], info->regs, 6); kfree(state); - - return 0; } static const struct of_device_id bbc_beep_match[] = { @@ -264,7 +262,7 @@ static struct platform_driver bbc_beep_driver = { .of_match_table = bbc_beep_match, }, .probe = bbc_beep_probe, - .remove = bbc_remove, + .remove_new = bbc_remove, .shutdown = sparcspkr_shutdown, }; @@ -310,7 +308,7 @@ out_err: return err; } -static int grover_remove(struct platform_device *op) +static void grover_remove(struct platform_device *op) { struct sparcspkr_state *state = platform_get_drvdata(op); struct grover_beep_info *info = &state->u.grover; @@ -325,8 +323,6 @@ static int grover_remove(struct platform_device *op) of_iounmap(&op->resource[2], info->freq_regs, 2); kfree(state); - - return 0; } static const struct of_device_id grover_beep_match[] = { @@ -344,7 +340,7 @@ static struct platform_driver grover_beep_driver = { .of_match_table = grover_beep_match, }, .probe = grover_beep_probe, - .remove = grover_remove, + .remove_new = grover_remove, .shutdown = sparcspkr_shutdown, }; diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index 111cb70cde46..5c4956678cd0 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -1286,13 +1286,11 @@ static int wistron_probe(struct platform_device *dev) return 0; } -static int wistron_remove(struct platform_device *dev) +static void wistron_remove(struct platform_device *dev) { wistron_led_remove(); input_unregister_device(wistron_idev); bios_detach(); - - return 0; } static int wistron_suspend(struct device *dev) @@ -1336,7 +1334,7 @@ static struct platform_driver wistron_driver = { .pm = pm_sleep_ptr(&wistron_pm_ops), }, .probe = wistron_probe, - .remove = wistron_remove, + .remove_new = wistron_remove, }; static int __init wb_module_init(void) diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c index a42fe041b73c..e4a06c73b72d 100644 --- a/drivers/input/misc/wm831x-on.c +++ b/drivers/input/misc/wm831x-on.c @@ -123,20 +123,18 @@ err: return ret; } -static int wm831x_on_remove(struct platform_device *pdev) +static void wm831x_on_remove(struct platform_device *pdev) { struct wm831x_on *wm831x_on = platform_get_drvdata(pdev); int irq = platform_get_irq(pdev, 0); free_irq(irq, wm831x_on); cancel_delayed_work_sync(&wm831x_on->work); - - return 0; } static struct platform_driver wm831x_on_driver = { .probe = wm831x_on_probe, - .remove = wm831x_on_remove, + .remove_new = wm831x_on_remove, .driver = { .name = "wm831x-on", }, diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c index 05851bc32541..a84098448f5b 100644 --- a/drivers/input/mouse/cyapa.c +++ b/drivers/input/mouse/cyapa.c @@ -1223,7 +1223,7 @@ static DEVICE_ATTR(baseline, S_IRUGO, cyapa_show_baseline, NULL); static DEVICE_ATTR(calibrate, S_IWUSR, NULL, cyapa_calibrate_store); static DEVICE_ATTR(mode, S_IRUGO, cyapa_show_mode, NULL); -static struct attribute *cyapa_sysfs_entries[] = { +static struct attribute *cyapa_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_product_id.attr, &dev_attr_update_fw.attr, @@ -1232,10 +1232,7 @@ static struct attribute *cyapa_sysfs_entries[] = { &dev_attr_mode.attr, NULL, }; - -static const struct attribute_group cyapa_sysfs_group = { - .attrs = cyapa_sysfs_entries, -}; +ATTRIBUTE_GROUPS(cyapa); static void cyapa_disable_regulator(void *data) { @@ -1302,12 +1299,6 @@ static int cyapa_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(dev, &cyapa_sysfs_group); - if (error) { - dev_err(dev, "failed to create sysfs entries: %d\n", error); - return error; - } - error = cyapa_prepare_wakeup_controls(cyapa); if (error) { dev_err(dev, "failed to prepare wakeup controls: %d\n", error); @@ -1484,6 +1475,7 @@ MODULE_DEVICE_TABLE(of, cyapa_of_match); static struct i2c_driver cyapa_driver = { .driver = { .name = "cyapa", + .dev_groups = cyapa_groups, .pm = pm_ptr(&cyapa_pm_ops), .acpi_match_table = ACPI_PTR(cyapa_acpi_id), .of_match_table = of_match_ptr(cyapa_of_match), diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c index 2b7b86eef280..c00dc1275da2 100644 --- a/drivers/input/mouse/navpoint.c +++ b/drivers/input/mouse/navpoint.c @@ -295,7 +295,7 @@ err_free_gpio: return error; } -static int navpoint_remove(struct platform_device *pdev) +static void navpoint_remove(struct platform_device *pdev) { const struct navpoint_platform_data *pdata = dev_get_platdata(&pdev->dev); @@ -311,8 +311,6 @@ static int navpoint_remove(struct platform_device *pdev) if (gpio_is_valid(pdata->gpio)) gpio_free(pdata->gpio); - - return 0; } static int navpoint_suspend(struct device *dev) @@ -348,7 +346,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(navpoint_pm_ops, static struct platform_driver navpoint_driver = { .probe = navpoint_probe, - .remove = navpoint_remove, + .remove_new = navpoint_remove, .driver = { .name = "navpoint", .pm = pm_sleep_ptr(&navpoint_pm_ops), diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index f2e093b0b998..1b45b1d3077d 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -277,11 +277,11 @@ void rmi_unregister_function(struct rmi_function *fn) device_del(&fn->dev); of_node_put(fn->dev.of_node); - put_device(&fn->dev); for (i = 0; i < fn->num_of_irqs; i++) irq_dispose_mapping(fn->irq[i]); + put_device(&fn->dev); } /** diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index 0d9a5756e3f5..3b3ac71e53dc 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -471,7 +471,7 @@ static ssize_t rmi_driver_update_fw_store(struct device *dev, if (buf[count - 1] == '\0' || buf[count - 1] == '\n') copy_count -= 1; - strncpy(fw_name, buf, copy_count); + memcpy(fw_name, buf, copy_count); fw_name[copy_count] = '\0'; ret = request_firmware(&fw, fw_name, dev); diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c index 9f8d7b332d1b..c5b634940cfc 100644 --- a/drivers/input/serio/altera_ps2.c +++ b/drivers/input/serio/altera_ps2.c @@ -125,13 +125,11 @@ static int altera_ps2_probe(struct platform_device *pdev) /* * Remove one device from this driver. */ -static int altera_ps2_remove(struct platform_device *pdev) +static void altera_ps2_remove(struct platform_device *pdev) { struct ps2if *ps2if = platform_get_drvdata(pdev); serio_unregister_port(ps2if->io); - - return 0; } #ifdef CONFIG_OF @@ -148,7 +146,7 @@ MODULE_DEVICE_TABLE(of, altera_ps2_match); */ static struct platform_driver altera_ps2_driver = { .probe = altera_ps2_probe, - .remove = altera_ps2_remove, + .remove_new = altera_ps2_remove, .driver = { .name = DRV_NAME, .of_match_table = of_match_ptr(altera_ps2_match), diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c index ec93cb4573c3..0bd6ae106809 100644 --- a/drivers/input/serio/ams_delta_serio.c +++ b/drivers/input/serio/ams_delta_serio.c @@ -173,18 +173,16 @@ static int ams_delta_serio_init(struct platform_device *pdev) return 0; } -static int ams_delta_serio_exit(struct platform_device *pdev) +static void ams_delta_serio_exit(struct platform_device *pdev) { struct ams_delta_serio *priv = platform_get_drvdata(pdev); serio_unregister_port(priv->serio); - - return 0; } static struct platform_driver ams_delta_serio_driver = { .probe = ams_delta_serio_init, - .remove = ams_delta_serio_exit, + .remove_new = ams_delta_serio_exit, .driver = { .name = DRIVER_NAME }, diff --git a/drivers/input/serio/apbps2.c b/drivers/input/serio/apbps2.c index 3f6866d39b86..dbbb10251520 100644 --- a/drivers/input/serio/apbps2.c +++ b/drivers/input/serio/apbps2.c @@ -187,13 +187,11 @@ static int apbps2_of_probe(struct platform_device *ofdev) return 0; } -static int apbps2_of_remove(struct platform_device *of_dev) +static void apbps2_of_remove(struct platform_device *of_dev) { struct apbps2_priv *priv = platform_get_drvdata(of_dev); serio_unregister_port(priv->io); - - return 0; } static const struct of_device_id apbps2_of_match[] = { @@ -210,7 +208,7 @@ static struct platform_driver apbps2_of_driver = { .of_match_table = apbps2_of_match, }, .probe = apbps2_of_probe, - .remove = apbps2_of_remove, + .remove_new = apbps2_of_remove, }; module_platform_driver(apbps2_of_driver); diff --git a/drivers/input/serio/arc_ps2.c b/drivers/input/serio/arc_ps2.c index a6debb13d527..9d8726830140 100644 --- a/drivers/input/serio/arc_ps2.c +++ b/drivers/input/serio/arc_ps2.c @@ -232,7 +232,7 @@ static int arc_ps2_probe(struct platform_device *pdev) return 0; } -static int arc_ps2_remove(struct platform_device *pdev) +static void arc_ps2_remove(struct platform_device *pdev) { struct arc_ps2_data *arc_ps2 = platform_get_drvdata(pdev); int i; @@ -244,8 +244,6 @@ static int arc_ps2_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "frame error count = %i\n", arc_ps2->frame_error); dev_dbg(&pdev->dev, "buffer overflow count = %i\n", arc_ps2->buf_overflow); - - return 0; } #ifdef CONFIG_OF @@ -262,7 +260,7 @@ static struct platform_driver arc_ps2_driver = { .of_match_table = of_match_ptr(arc_ps2_match), }, .probe = arc_ps2_probe, - .remove = arc_ps2_remove, + .remove_new = arc_ps2_remove, }; module_platform_driver(arc_ps2_driver); diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c index 3da751f4a6bf..d5c9bb3d0103 100644 --- a/drivers/input/serio/ct82c710.c +++ b/drivers/input/serio/ct82c710.c @@ -180,11 +180,9 @@ static int ct82c710_probe(struct platform_device *dev) return 0; } -static int ct82c710_remove(struct platform_device *dev) +static void ct82c710_remove(struct platform_device *dev) { serio_unregister_port(ct82c710_port); - - return 0; } static struct platform_driver ct82c710_driver = { @@ -192,7 +190,7 @@ static struct platform_driver ct82c710_driver = { .name = "ct82c710", }, .probe = ct82c710_probe, - .remove = ct82c710_remove, + .remove_new = ct82c710_remove, }; diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h index b68793bf05c8..c2fda54dc384 100644 --- a/drivers/input/serio/i8042-sparcio.h +++ b/drivers/input/serio/i8042-sparcio.h @@ -82,11 +82,9 @@ static int sparc_i8042_probe(struct platform_device *op) return 0; } -static int sparc_i8042_remove(struct platform_device *op) +static void sparc_i8042_remove(struct platform_device *op) { of_iounmap(kbd_res, kbd_iobase, 8); - - return 0; } static const struct of_device_id sparc_i8042_match[] = { @@ -103,7 +101,7 @@ static struct platform_driver sparc_i8042_driver = { .of_match_table = sparc_i8042_match, }, .probe = sparc_i8042_probe, - .remove = sparc_i8042_remove, + .remove_new = sparc_i8042_remove, }; static bool i8042_is_mr_coffee(void) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 6dac7c1853a5..9fbb8d31575a 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1584,13 +1584,11 @@ static int i8042_probe(struct platform_device *dev) return error; } -static int i8042_remove(struct platform_device *dev) +static void i8042_remove(struct platform_device *dev) { i8042_unregister_ports(); i8042_free_irqs(); i8042_controller_reset(false); - - return 0; } static struct platform_driver i8042_driver = { @@ -1601,7 +1599,7 @@ static struct platform_driver i8042_driver = { #endif }, .probe = i8042_probe, - .remove = i8042_remove, + .remove_new = i8042_remove, .shutdown = i8042_shutdown, }; diff --git a/drivers/input/serio/ioc3kbd.c b/drivers/input/serio/ioc3kbd.c index d51bfe912db5..50552dc7b4f5 100644 --- a/drivers/input/serio/ioc3kbd.c +++ b/drivers/input/serio/ioc3kbd.c @@ -190,7 +190,7 @@ static int ioc3kbd_probe(struct platform_device *pdev) return 0; } -static int ioc3kbd_remove(struct platform_device *pdev) +static void ioc3kbd_remove(struct platform_device *pdev) { struct ioc3kbd_data *d = platform_get_drvdata(pdev); @@ -198,13 +198,11 @@ static int ioc3kbd_remove(struct platform_device *pdev) serio_unregister_port(d->kbd); serio_unregister_port(d->aux); - - return 0; } static struct platform_driver ioc3kbd_driver = { .probe = ioc3kbd_probe, - .remove = ioc3kbd_remove, + .remove_new = ioc3kbd_remove, .driver = { .name = "ioc3-kbd", }, diff --git a/drivers/input/serio/maceps2.c b/drivers/input/serio/maceps2.c index 629e15089c21..5ccfb82759b3 100644 --- a/drivers/input/serio/maceps2.c +++ b/drivers/input/serio/maceps2.c @@ -148,12 +148,10 @@ static int maceps2_probe(struct platform_device *dev) return 0; } -static int maceps2_remove(struct platform_device *dev) +static void maceps2_remove(struct platform_device *dev) { serio_unregister_port(maceps2_port[0]); serio_unregister_port(maceps2_port[1]); - - return 0; } static struct platform_driver maceps2_driver = { @@ -161,7 +159,7 @@ static struct platform_driver maceps2_driver = { .name = "maceps2", }, .probe = maceps2_probe, - .remove = maceps2_remove, + .remove_new = maceps2_remove, }; static int __init maceps2_init(void) diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c index 33a8e5889bd8..240a714f7081 100644 --- a/drivers/input/serio/olpc_apsp.c +++ b/drivers/input/serio/olpc_apsp.c @@ -238,7 +238,7 @@ err_pad: return error; } -static int olpc_apsp_remove(struct platform_device *pdev) +static void olpc_apsp_remove(struct platform_device *pdev) { struct olpc_apsp *priv = platform_get_drvdata(pdev); @@ -246,8 +246,6 @@ static int olpc_apsp_remove(struct platform_device *pdev) serio_unregister_port(priv->kbio); serio_unregister_port(priv->padio); - - return 0; } static const struct of_device_id olpc_apsp_dt_ids[] = { @@ -258,7 +256,7 @@ MODULE_DEVICE_TABLE(of, olpc_apsp_dt_ids); static struct platform_driver olpc_apsp_driver = { .probe = olpc_apsp_probe, - .remove = olpc_apsp_remove, + .remove_new = olpc_apsp_remove, .driver = { .name = "olpc-apsp", .of_match_table = olpc_apsp_dt_ids, diff --git a/drivers/input/serio/ps2-gpio.c b/drivers/input/serio/ps2-gpio.c index bc1dc484389b..c3ff60859a03 100644 --- a/drivers/input/serio/ps2-gpio.c +++ b/drivers/input/serio/ps2-gpio.c @@ -476,12 +476,11 @@ err_free_serio: return error; } -static int ps2_gpio_remove(struct platform_device *pdev) +static void ps2_gpio_remove(struct platform_device *pdev) { struct ps2_gpio_data *drvdata = platform_get_drvdata(pdev); serio_unregister_port(drvdata->serio); - return 0; } #if defined(CONFIG_OF) @@ -494,7 +493,7 @@ MODULE_DEVICE_TABLE(of, ps2_gpio_match); static struct platform_driver ps2_gpio_driver = { .probe = ps2_gpio_probe, - .remove = ps2_gpio_remove, + .remove_new = ps2_gpio_remove, .driver = { .name = DRIVER_NAME, .of_match_table = of_match_ptr(ps2_gpio_match), diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c index ba04058fc3cb..3f81f8749cd5 100644 --- a/drivers/input/serio/q40kbd.c +++ b/drivers/input/serio/q40kbd.c @@ -148,7 +148,7 @@ err_free_mem: return error; } -static int q40kbd_remove(struct platform_device *pdev) +static void q40kbd_remove(struct platform_device *pdev) { struct q40kbd *q40kbd = platform_get_drvdata(pdev); @@ -160,15 +160,13 @@ static int q40kbd_remove(struct platform_device *pdev) serio_unregister_port(q40kbd->port); free_irq(Q40_IRQ_KEYBOARD, q40kbd); kfree(q40kbd); - - return 0; } static struct platform_driver q40kbd_driver = { .driver = { .name = "q40kbd", }, - .remove = q40kbd_remove, + .remove_new = q40kbd_remove, }; module_platform_driver_probe(q40kbd_driver, q40kbd_probe); diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c index e8a9709f32eb..9bbfefd092c0 100644 --- a/drivers/input/serio/rpckbd.c +++ b/drivers/input/serio/rpckbd.c @@ -133,20 +133,18 @@ static int rpckbd_probe(struct platform_device *dev) return 0; } -static int rpckbd_remove(struct platform_device *dev) +static void rpckbd_remove(struct platform_device *dev) { struct serio *serio = platform_get_drvdata(dev); struct rpckbd_data *rpckbd = serio->port_data; serio_unregister_port(serio); kfree(rpckbd); - - return 0; } static struct platform_driver rpckbd_driver = { .probe = rpckbd_probe, - .remove = rpckbd_remove, + .remove_new = rpckbd_remove, .driver = { .name = "kart", }, diff --git a/drivers/input/serio/sun4i-ps2.c b/drivers/input/serio/sun4i-ps2.c index eb262640192e..aec66d9f5176 100644 --- a/drivers/input/serio/sun4i-ps2.c +++ b/drivers/input/serio/sun4i-ps2.c @@ -297,7 +297,7 @@ err_free_mem: return error; } -static int sun4i_ps2_remove(struct platform_device *pdev) +static void sun4i_ps2_remove(struct platform_device *pdev) { struct sun4i_ps2data *drvdata = platform_get_drvdata(pdev); @@ -311,8 +311,6 @@ static int sun4i_ps2_remove(struct platform_device *pdev) iounmap(drvdata->reg_base); kfree(drvdata); - - return 0; } static const struct of_device_id sun4i_ps2_match[] = { @@ -324,7 +322,7 @@ MODULE_DEVICE_TABLE(of, sun4i_ps2_match); static struct platform_driver sun4i_ps2_driver = { .probe = sun4i_ps2_probe, - .remove = sun4i_ps2_remove, + .remove_new = sun4i_ps2_remove, .driver = { .name = DRIVER_NAME, .of_match_table = sun4i_ps2_match, diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c index f3d28da70b75..d8f9faf2b529 100644 --- a/drivers/input/serio/xilinx_ps2.c +++ b/drivers/input/serio/xilinx_ps2.c @@ -329,7 +329,7 @@ failed1: * if the driver module is being unloaded. It frees any resources allocated to * the device. */ -static int xps2_of_remove(struct platform_device *of_dev) +static void xps2_of_remove(struct platform_device *of_dev) { struct xps2data *drvdata = platform_get_drvdata(of_dev); struct resource r_mem; /* IO mem resources */ @@ -344,8 +344,6 @@ static int xps2_of_remove(struct platform_device *of_dev) release_mem_region(r_mem.start, resource_size(&r_mem)); kfree(drvdata); - - return 0; } /* Match table for of_platform binding */ @@ -361,7 +359,7 @@ static struct platform_driver xps2_of_driver = { .of_match_table = xps2_of_match, }, .probe = xps2_of_probe, - .remove = xps2_of_remove, + .remove_new = xps2_of_remove, }; module_platform_driver(xps2_of_driver); diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c index edb36d663f22..a0598e9c7aff 100644 --- a/drivers/input/touchscreen/ad7877.c +++ b/drivers/input/touchscreen/ad7877.c @@ -612,10 +612,11 @@ static umode_t ad7877_attr_is_visible(struct kobject *kobj, return mode; } -static const struct attribute_group ad7877_attr_group = { +static const struct attribute_group ad7877_group = { .is_visible = ad7877_attr_is_visible, .attrs = ad7877_attributes, }; +__ATTRIBUTE_GROUPS(ad7877); static void ad7877_setup_ts_def_msg(struct spi_device *spi, struct ad7877 *ts) { @@ -777,10 +778,6 @@ static int ad7877_probe(struct spi_device *spi) return err; } - err = devm_device_add_group(&spi->dev, &ad7877_attr_group); - if (err) - return err; - err = input_register_device(input_dev); if (err) return err; @@ -810,8 +807,9 @@ static DEFINE_SIMPLE_DEV_PM_OPS(ad7877_pm, ad7877_suspend, ad7877_resume); static struct spi_driver ad7877_driver = { .driver = { - .name = "ad7877", - .pm = pm_sleep_ptr(&ad7877_pm), + .name = "ad7877", + .dev_groups = ad7877_groups, + .pm = pm_sleep_ptr(&ad7877_pm), }, .probe = ad7877_probe, }; diff --git a/drivers/input/touchscreen/ad7879-i2c.c b/drivers/input/touchscreen/ad7879-i2c.c index feaa6f8b01ed..5c094ab74698 100644 --- a/drivers/input/touchscreen/ad7879-i2c.c +++ b/drivers/input/touchscreen/ad7879-i2c.c @@ -58,9 +58,10 @@ MODULE_DEVICE_TABLE(of, ad7879_i2c_dt_ids); static struct i2c_driver ad7879_i2c_driver = { .driver = { - .name = "ad7879", - .pm = &ad7879_pm_ops, - .of_match_table = of_match_ptr(ad7879_i2c_dt_ids), + .name = "ad7879", + .dev_groups = ad7879_groups, + .pm = &ad7879_pm_ops, + .of_match_table = of_match_ptr(ad7879_i2c_dt_ids), }, .probe = ad7879_i2c_probe, .id_table = ad7879_id, diff --git a/drivers/input/touchscreen/ad7879-spi.c b/drivers/input/touchscreen/ad7879-spi.c index 50e889846800..064968fe57cf 100644 --- a/drivers/input/touchscreen/ad7879-spi.c +++ b/drivers/input/touchscreen/ad7879-spi.c @@ -56,9 +56,10 @@ MODULE_DEVICE_TABLE(of, ad7879_spi_dt_ids); static struct spi_driver ad7879_spi_driver = { .driver = { - .name = "ad7879", - .pm = &ad7879_pm_ops, - .of_match_table = of_match_ptr(ad7879_spi_dt_ids), + .name = "ad7879", + .dev_groups = ad7879_groups, + .pm = &ad7879_pm_ops, + .of_match_table = of_match_ptr(ad7879_spi_dt_ids), }, .probe = ad7879_spi_probe, }; diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index e850853328f1..e5d69bf2276e 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -391,6 +391,12 @@ static const struct attribute_group ad7879_attr_group = { .attrs = ad7879_attributes, }; +const struct attribute_group *ad7879_groups[] = { + &ad7879_attr_group, + NULL +}; +EXPORT_SYMBOL_GPL(ad7879_groups); + #ifdef CONFIG_GPIOLIB static int ad7879_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) @@ -612,10 +618,6 @@ int ad7879_probe(struct device *dev, struct regmap *regmap, __ad7879_disable(ts); - err = devm_device_add_group(dev, &ad7879_attr_group); - if (err) - return err; - err = ad7879_gpio_add(ts); if (err) return err; diff --git a/drivers/input/touchscreen/ad7879.h b/drivers/input/touchscreen/ad7879.h index ae8aa1428e56..d71a8e787290 100644 --- a/drivers/input/touchscreen/ad7879.h +++ b/drivers/input/touchscreen/ad7879.h @@ -8,11 +8,14 @@ #ifndef _AD7879_H_ #define _AD7879_H_ +#include <linux/pm.h> #include <linux/types.h> +struct attribute_group; struct device; struct regmap; +extern const struct attribute_group *ad7879_groups[]; extern const struct dev_pm_ops ad7879_pm_ops; int ad7879_probe(struct device *dev, struct regmap *regmap, diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index faea40dd66d0..d2bbb436a77d 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -625,15 +625,12 @@ static ssize_t ads7846_disable_store(struct device *dev, static DEVICE_ATTR(disable, 0664, ads7846_disable_show, ads7846_disable_store); -static struct attribute *ads784x_attributes[] = { +static struct attribute *ads784x_attrs[] = { &dev_attr_pen_down.attr, &dev_attr_disable.attr, NULL, }; - -static const struct attribute_group ads784x_attr_group = { - .attrs = ads784x_attributes, -}; +ATTRIBUTE_GROUPS(ads784x); /*--------------------------------------------------------------------------*/ @@ -1357,10 +1354,6 @@ static int ads7846_probe(struct spi_device *spi) else (void) ads7846_read12_ser(dev, READ_12BIT_SER(vaux)); - err = devm_device_add_group(dev, &ads784x_attr_group); - if (err) - return err; - err = input_register_device(input_dev); if (err) return err; @@ -1386,9 +1379,10 @@ static void ads7846_remove(struct spi_device *spi) static struct spi_driver ads7846_driver = { .driver = { - .name = "ads7846", - .pm = pm_sleep_ptr(&ads7846_pm), - .of_match_table = ads7846_dt_ids, + .name = "ads7846", + .dev_groups = ads784x_groups, + .pm = pm_sleep_ptr(&ads7846_pm), + .of_match_table = ads7846_dt_ids, }, .probe = ads7846_probe, .remove = ads7846_remove, diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index db5a885ecd72..68527ede5c0e 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -207,7 +207,7 @@ struct cyttsp5 { int num_prv_rec; struct regmap *regmap; struct touchscreen_properties prop; - struct regulator *vdd; + struct regulator_bulk_data supplies[2]; }; /* @@ -817,7 +817,7 @@ static void cyttsp5_cleanup(void *data) { struct cyttsp5 *ts = data; - regulator_disable(ts->vdd); + regulator_bulk_disable(ARRAY_SIZE(ts->supplies), ts->supplies); } static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, @@ -840,9 +840,12 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, init_completion(&ts->cmd_done); /* Power up the device */ - ts->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(ts->vdd)) { - error = PTR_ERR(ts->vdd); + ts->supplies[0].supply = "vdd"; + ts->supplies[1].supply = "vddio"; + error = devm_regulator_bulk_get(dev, ARRAY_SIZE(ts->supplies), + ts->supplies); + if (error) { + dev_err(ts->dev, "Failed to get regulators, error %d\n", error); return error; } @@ -850,9 +853,11 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, if (error) return error; - error = regulator_enable(ts->vdd); - if (error) + error = regulator_bulk_enable(ARRAY_SIZE(ts->supplies), ts->supplies); + if (error) { + dev_err(ts->dev, "Failed to enable regulators, error %d\n", error); return error; + } ts->input = devm_input_allocate_device(dev); if (!ts->input) { diff --git a/drivers/input/touchscreen/da9052_tsi.c b/drivers/input/touchscreen/da9052_tsi.c index f91d0e02ddae..d71690ce6463 100644 --- a/drivers/input/touchscreen/da9052_tsi.c +++ b/drivers/input/touchscreen/da9052_tsi.c @@ -311,7 +311,7 @@ err_free_mem: return error; } -static int da9052_ts_remove(struct platform_device *pdev) +static void da9052_ts_remove(struct platform_device *pdev) { struct da9052_tsi *tsi = platform_get_drvdata(pdev); @@ -322,13 +322,11 @@ static int da9052_ts_remove(struct platform_device *pdev) input_unregister_device(tsi->dev); kfree(tsi); - - return 0; } static struct platform_driver da9052_tsi_driver = { .probe = da9052_ts_probe, - .remove = da9052_ts_remove, + .remove_new = da9052_ts_remove, .driver = { .name = "da9052-tsi", }, diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 457d53337fbb..3e102bcc4a1c 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -580,10 +580,7 @@ static struct attribute *edt_ft5x06_attrs[] = { &dev_attr_crc_errors.attr, NULL }; - -static const struct attribute_group edt_ft5x06_attr_group = { - .attrs = edt_ft5x06_attrs, -}; +ATTRIBUTE_GROUPS(edt_ft5x06); static void edt_ft5x06_restore_reg_parameters(struct edt_ft5x06_ts_data *tsdata) { @@ -1330,10 +1327,6 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &edt_ft5x06_attr_group); - if (error) - return error; - error = input_register_device(input); if (error) return error; @@ -1502,6 +1495,7 @@ MODULE_DEVICE_TABLE(of, edt_ft5x06_of_match); static struct i2c_driver edt_ft5x06_ts_driver = { .driver = { .name = "edt_ft5x06", + .dev_groups = edt_ft5x06_groups, .of_match_table = edt_ft5x06_of_match, .pm = pm_sleep_ptr(&edt_ft5x06_ts_pm_ops), .probe_type = PROBE_PREFER_ASYNCHRONOUS, diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index a1af3de9f310..365765d40e62 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1299,7 +1299,7 @@ static ELANTS_VERSION_ATTR(solution_version); static ELANTS_VERSION_ATTR(bc_version); static ELANTS_VERSION_ATTR(iap_version); -static struct attribute *elants_attributes[] = { +static struct attribute *elants_i2c_attrs[] = { &dev_attr_calibrate.attr, &dev_attr_update_fw.attr, &dev_attr_iap_mode.attr, @@ -1313,10 +1313,7 @@ static struct attribute *elants_attributes[] = { &elants_ver_attr_iap_version.dattr.attr, NULL }; - -static const struct attribute_group elants_attribute_group = { - .attrs = elants_attributes, -}; +ATTRIBUTE_GROUPS(elants_i2c); static int elants_i2c_power_on(struct elants_data *ts) { @@ -1552,13 +1549,6 @@ static int elants_i2c_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &elants_attribute_group); - if (error) { - dev_err(&client->dev, "failed to create sysfs attributes: %d\n", - error); - return error; - } - return 0; } @@ -1667,6 +1657,7 @@ static struct i2c_driver elants_i2c_driver = { .id_table = elants_i2c_id, .driver = { .name = DEVICE_NAME, + .dev_groups = elants_i2c_groups, .pm = pm_sleep_ptr(&elants_i2c_pm_ops), .acpi_match_table = ACPI_PTR(elants_acpi_id), .of_match_table = of_match_ptr(elants_of_match), diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c index 4c0d99aae9e0..a4030cc9ff60 100644 --- a/drivers/input/touchscreen/exc3000.c +++ b/drivers/input/touchscreen/exc3000.c @@ -325,16 +325,13 @@ static ssize_t type_show(struct device *dev, } static DEVICE_ATTR_RO(type); -static struct attribute *sysfs_attrs[] = { +static struct attribute *exc3000_attrs[] = { &dev_attr_fw_version.attr, &dev_attr_model.attr, &dev_attr_type.attr, NULL }; - -static struct attribute_group exc3000_attribute_group = { - .attrs = sysfs_attrs -}; +ATTRIBUTE_GROUPS(exc3000); static int exc3000_probe(struct i2c_client *client) { @@ -437,10 +434,6 @@ static int exc3000_probe(struct i2c_client *client) i2c_set_clientdata(client, data); - error = devm_device_add_group(&client->dev, &exc3000_attribute_group); - if (error) - return error; - return 0; } @@ -473,6 +466,7 @@ MODULE_DEVICE_TABLE(acpi, exc3000_acpi_match); static struct i2c_driver exc3000_driver = { .driver = { .name = "exc3000", + .dev_groups = exc3000_groups, .of_match_table = of_match_ptr(exc3000_of_match), .acpi_match_table = ACPI_PTR(exc3000_acpi_match), }, diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index 404153338df7..0f58258306bf 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -954,16 +954,13 @@ static DEVICE_ATTR(version, 0664, hideep_fw_version_show, NULL); static DEVICE_ATTR(product_id, 0664, hideep_product_id_show, NULL); static DEVICE_ATTR(update_fw, 0664, NULL, hideep_update_fw); -static struct attribute *hideep_ts_sysfs_entries[] = { +static struct attribute *hideep_ts_attrs[] = { &dev_attr_version.attr, &dev_attr_product_id.attr, &dev_attr_update_fw.attr, NULL, }; - -static const struct attribute_group hideep_ts_attr_group = { - .attrs = hideep_ts_sysfs_entries, -}; +ATTRIBUTE_GROUPS(hideep_ts); static void hideep_set_work_mode(struct hideep_ts *ts) { @@ -1096,13 +1093,6 @@ static int hideep_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &hideep_ts_attr_group); - if (error) { - dev_err(&client->dev, - "failed to add sysfs attributes: %d\n", error); - return error; - } - return 0; } @@ -1131,6 +1121,7 @@ MODULE_DEVICE_TABLE(of, hideep_match_table); static struct i2c_driver hideep_driver = { .driver = { .name = HIDEEP_I2C_NAME, + .dev_groups = hideep_ts_groups, .of_match_table = of_match_ptr(hideep_match_table), .acpi_match_table = ACPI_PTR(hideep_acpi_id), .pm = pm_sleep_ptr(&hideep_pm_ops), diff --git a/drivers/input/touchscreen/hycon-hy46xx.c b/drivers/input/touchscreen/hycon-hy46xx.c index 2450cfa14de9..d0f257989fd6 100644 --- a/drivers/input/touchscreen/hycon-hy46xx.c +++ b/drivers/input/touchscreen/hycon-hy46xx.c @@ -274,10 +274,7 @@ static struct attribute *hycon_hy46xx_attrs[] = { &hycon_hy46xx_attr_bootloader_version.dattr.attr, NULL }; - -static const struct attribute_group hycon_hy46xx_attr_group = { - .attrs = hycon_hy46xx_attrs, -}; +ATTRIBUTE_GROUPS(hycon_hy46xx); static void hycon_hy46xx_get_defaults(struct device *dev, struct hycon_hy46xx_data *tsdata) { @@ -535,10 +532,6 @@ static int hycon_hy46xx_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &hycon_hy46xx_attr_group); - if (error) - return error; - error = input_register_device(input); if (error) return error; @@ -576,6 +569,7 @@ MODULE_DEVICE_TABLE(of, hycon_hy46xx_of_match); static struct i2c_driver hycon_hy46xx_driver = { .driver = { .name = "hycon_hy46xx", + .dev_groups = hycon_hy46xx_groups, .of_match_table = hycon_hy46xx_of_match, .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index ad6828e4f2e2..31ffdc2a93f3 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -876,7 +876,7 @@ exit: static DEVICE_ATTR(firmware_update, 0200, NULL, ili210x_firmware_update_store); -static struct attribute *ili210x_attributes[] = { +static struct attribute *ili210x_attrs[] = { &dev_attr_calibrate.attr, &dev_attr_firmware_update.attr, &dev_attr_firmware_version.attr, @@ -904,10 +904,11 @@ static umode_t ili210x_attributes_visible(struct kobject *kobj, return attr->mode; } -static const struct attribute_group ili210x_attr_group = { - .attrs = ili210x_attributes, +static const struct attribute_group ili210x_group = { + .attrs = ili210x_attrs, .is_visible = ili210x_attributes_visible, }; +__ATTRIBUTE_GROUPS(ili210x); static void ili210x_power_down(void *data) { @@ -1013,13 +1014,6 @@ static int ili210x_i2c_probe(struct i2c_client *client) if (error) return error; - error = devm_device_add_group(dev, &ili210x_attr_group); - if (error) { - dev_err(dev, "Unable to create sysfs attributes, err: %d\n", - error); - return error; - } - error = input_register_device(priv->input); if (error) { dev_err(dev, "Cannot register input device, err: %d\n", error); @@ -1050,6 +1044,7 @@ MODULE_DEVICE_TABLE(of, ili210x_dt_ids); static struct i2c_driver ili210x_ts_driver = { .driver = { .name = "ili210x_i2c", + .dev_groups = ili210x_groups, .of_match_table = ili210x_dt_ids, }, .id_table = ili210x_i2c_id, diff --git a/drivers/input/touchscreen/ilitek_ts_i2c.c b/drivers/input/touchscreen/ilitek_ts_i2c.c index 2f872e95fbba..90c4934e750a 100644 --- a/drivers/input/touchscreen/ilitek_ts_i2c.c +++ b/drivers/input/touchscreen/ilitek_ts_i2c.c @@ -537,10 +537,7 @@ static struct attribute *ilitek_sysfs_attrs[] = { &dev_attr_product_id.attr, NULL }; - -static struct attribute_group ilitek_attrs_group = { - .attrs = ilitek_sysfs_attrs, -}; +ATTRIBUTE_GROUPS(ilitek_sysfs); static int ilitek_ts_i2c_probe(struct i2c_client *client) { @@ -595,12 +592,6 @@ static int ilitek_ts_i2c_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(dev, &ilitek_attrs_group); - if (error) { - dev_err(dev, "sysfs create group failed: %d\n", error); - return error; - } - return 0; } @@ -675,6 +666,7 @@ MODULE_DEVICE_TABLE(of, ilitek_ts_i2c_match); static struct i2c_driver ilitek_ts_i2c_driver = { .driver = { .name = ILITEK_TS_NAME, + .dev_groups = ilitek_sysfs_groups, .pm = pm_sleep_ptr(&ilitek_pm_ops), .of_match_table = of_match_ptr(ilitek_ts_i2c_match), .acpi_match_table = ACPI_PTR(ilitekts_acpi_id), diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c index b4768b66eb10..a3f4fb85bee5 100644 --- a/drivers/input/touchscreen/iqs5xx.c +++ b/drivers/input/touchscreen/iqs5xx.c @@ -974,10 +974,11 @@ static umode_t iqs5xx_attr_is_visible(struct kobject *kobj, return attr->mode; } -static const struct attribute_group iqs5xx_attr_group = { +static const struct attribute_group iqs5xx_group = { .is_visible = iqs5xx_attr_is_visible, .attrs = iqs5xx_attrs, }; +__ATTRIBUTE_GROUPS(iqs5xx); static int iqs5xx_suspend(struct device *dev) { @@ -1053,12 +1054,6 @@ static int iqs5xx_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &iqs5xx_attr_group); - if (error) { - dev_err(&client->dev, "Failed to add attributes: %d\n", error); - return error; - } - if (iqs5xx->input) { error = input_register_device(iqs5xx->input); if (error) @@ -1089,6 +1084,7 @@ MODULE_DEVICE_TABLE(of, iqs5xx_of_match); static struct i2c_driver iqs5xx_i2c_driver = { .driver = { .name = "iqs5xx", + .dev_groups = iqs5xx_groups, .of_match_table = iqs5xx_of_match, .pm = pm_sleep_ptr(&iqs5xx_pm), }, diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 85b95ed461e7..bfbebe245040 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -252,18 +252,16 @@ static int mainstone_wm97xx_probe(struct platform_device *pdev) return wm97xx_register_mach_ops(wm, &mainstone_mach_ops); } -static int mainstone_wm97xx_remove(struct platform_device *pdev) +static void mainstone_wm97xx_remove(struct platform_device *pdev) { struct wm97xx *wm = platform_get_drvdata(pdev); wm97xx_unregister_mach_ops(wm); - - return 0; } static struct platform_driver mainstone_wm97xx_driver = { .probe = mainstone_wm97xx_probe, - .remove = mainstone_wm97xx_remove, + .remove_new = mainstone_wm97xx_remove, .driver = { .name = "wm97xx-touch", }, diff --git a/drivers/input/touchscreen/mc13783_ts.c b/drivers/input/touchscreen/mc13783_ts.c index ae0d978c83bf..cbcd6e34efb7 100644 --- a/drivers/input/touchscreen/mc13783_ts.c +++ b/drivers/input/touchscreen/mc13783_ts.c @@ -217,18 +217,16 @@ err_free_mem: return ret; } -static int mc13783_ts_remove(struct platform_device *pdev) +static void mc13783_ts_remove(struct platform_device *pdev) { struct mc13783_ts_priv *priv = platform_get_drvdata(pdev); input_unregister_device(priv->idev); kfree(priv); - - return 0; } static struct platform_driver mc13783_ts_driver = { - .remove = mc13783_ts_remove, + .remove_new = mc13783_ts_remove, .driver = { .name = MC13783_TS_NAME, }, diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index 2ac4483fbc25..aa325486f618 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -1419,10 +1419,7 @@ static struct attribute *mip4_attrs[] = { &dev_attr_update_fw.attr, NULL, }; - -static const struct attribute_group mip4_attr_group = { - .attrs = mip4_attrs, -}; +ATTRIBUTE_GROUPS(mip4); static int mip4_probe(struct i2c_client *client) { @@ -1514,13 +1511,6 @@ static int mip4_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &mip4_attr_group); - if (error) { - dev_err(&client->dev, - "Failed to create sysfs attribute group: %d\n", error); - return error; - } - return 0; } @@ -1589,6 +1579,7 @@ static struct i2c_driver mip4_driver = { .probe = mip4_probe, .driver = { .name = MIP4_DEVICE_NAME, + .dev_groups = mip4_groups, .of_match_table = of_match_ptr(mip4_of_match), .acpi_match_table = ACPI_PTR(mip4_acpi_match), .pm = pm_sleep_ptr(&mip4_pm_ops), diff --git a/drivers/input/touchscreen/pcap_ts.c b/drivers/input/touchscreen/pcap_ts.c index b2da0194e02a..821245019fea 100644 --- a/drivers/input/touchscreen/pcap_ts.c +++ b/drivers/input/touchscreen/pcap_ts.c @@ -197,7 +197,7 @@ fail: return err; } -static int pcap_ts_remove(struct platform_device *pdev) +static void pcap_ts_remove(struct platform_device *pdev) { struct pcap_ts *pcap_ts = platform_get_drvdata(pdev); @@ -207,8 +207,6 @@ static int pcap_ts_remove(struct platform_device *pdev) input_unregister_device(pcap_ts->input); kfree(pcap_ts); - - return 0; } #ifdef CONFIG_PM @@ -240,7 +238,7 @@ static const struct dev_pm_ops pcap_ts_pm_ops = { static struct platform_driver pcap_ts_driver = { .probe = pcap_ts_probe, - .remove = pcap_ts_remove, + .remove_new = pcap_ts_remove, .driver = { .name = "pcap-ts", .pm = PCAP_TS_PM_OPS, diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 78dd3059d585..13c500e776f6 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -1004,7 +1004,7 @@ static DEVICE_ATTR(boot_mode, S_IRUGO, raydium_i2c_boot_mode_show, NULL); static DEVICE_ATTR(update_fw, S_IWUSR, NULL, raydium_i2c_update_fw_store); static DEVICE_ATTR(calibrate, S_IWUSR, NULL, raydium_i2c_calibrate_store); -static struct attribute *raydium_i2c_attributes[] = { +static struct attribute *raydium_i2c_attrs[] = { &dev_attr_update_fw.attr, &dev_attr_boot_mode.attr, &dev_attr_fw_version.attr, @@ -1012,10 +1012,7 @@ static struct attribute *raydium_i2c_attributes[] = { &dev_attr_calibrate.attr, NULL }; - -static const struct attribute_group raydium_i2c_attribute_group = { - .attrs = raydium_i2c_attributes, -}; +ATTRIBUTE_GROUPS(raydium_i2c); static int raydium_i2c_power_on(struct raydium_data *ts) { @@ -1174,14 +1171,6 @@ static int raydium_i2c_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, - &raydium_i2c_attribute_group); - if (error) { - dev_err(&client->dev, "failed to create sysfs attributes: %d\n", - error); - return error; - } - return 0; } @@ -1265,6 +1254,7 @@ static struct i2c_driver raydium_i2c_driver = { .id_table = raydium_i2c_id, .driver = { .name = "raydium_ts", + .dev_groups = raydium_i2c_groups, .pm = pm_sleep_ptr(&raydium_i2c_pm_ops), .acpi_match_table = ACPI_PTR(raydium_acpi_id), .of_match_table = of_match_ptr(raydium_of_match), diff --git a/drivers/input/touchscreen/rohm_bu21023.c b/drivers/input/touchscreen/rohm_bu21023.c index 240424f06b98..4493ad0c9322 100644 --- a/drivers/input/touchscreen/rohm_bu21023.c +++ b/drivers/input/touchscreen/rohm_bu21023.c @@ -854,10 +854,7 @@ static struct attribute *rohm_ts_attrs[] = { &dev_attr_inv_y.attr, NULL, }; - -static const struct attribute_group rohm_ts_attr_group = { - .attrs = rohm_ts_attrs, -}; +ATTRIBUTE_GROUPS(rohm_ts); static int rohm_ts_device_init(struct i2c_client *client, u8 setup2) { @@ -1164,12 +1161,6 @@ static int rohm_bu21023_i2c_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(dev, &rohm_ts_attr_group); - if (error) { - dev_err(dev, "failed to create sysfs group: %d\n", error); - return error; - } - return error; } @@ -1182,6 +1173,7 @@ MODULE_DEVICE_TABLE(i2c, rohm_bu21023_i2c_id); static struct i2c_driver rohm_bu21023_i2c_driver = { .driver = { .name = BU21023_NAME, + .dev_groups = rohm_ts_groups, }, .probe = rohm_bu21023_i2c_probe, .id_table = rohm_bu21023_i2c_id, diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c index 998d99d18911..149cc2c4925e 100644 --- a/drivers/input/touchscreen/s6sy761.c +++ b/drivers/input/touchscreen/s6sy761.c @@ -286,10 +286,7 @@ static struct attribute *s6sy761_sysfs_attrs[] = { &dev_attr_devid.attr, NULL }; - -static struct attribute_group s6sy761_attribute_group = { - .attrs = s6sy761_sysfs_attrs -}; +ATTRIBUTE_GROUPS(s6sy761_sysfs); static int s6sy761_power_on(struct s6sy761_data *sdata) { @@ -465,10 +462,6 @@ static int s6sy761_probe(struct i2c_client *client) if (err) return err; - err = devm_device_add_group(&client->dev, &s6sy761_attribute_group); - if (err) - return err; - pm_runtime_enable(&client->dev); return 0; @@ -535,6 +528,7 @@ MODULE_DEVICE_TABLE(i2c, s6sy761_id); static struct i2c_driver s6sy761_driver = { .driver = { .name = S6SY761_DEV_NAME, + .dev_groups = s6sy761_sysfs_groups, .of_match_table = of_match_ptr(s6sy761_of_match), .pm = pm_ptr(&s6sy761_pm_ops), }, diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index 56e371fd88fa..85010fa07908 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -517,10 +517,7 @@ static struct attribute *stmfts_sysfs_attrs[] = { &dev_attr_hover_enable.attr, NULL }; - -static struct attribute_group stmfts_attribute_group = { - .attrs = stmfts_sysfs_attrs -}; +ATTRIBUTE_GROUPS(stmfts_sysfs); static int stmfts_power_on(struct stmfts_data *sdata) { @@ -727,10 +724,6 @@ static int stmfts_probe(struct i2c_client *client) } } - err = devm_device_add_group(&client->dev, &stmfts_attribute_group); - if (err) - return err; - pm_runtime_enable(&client->dev); device_enable_async_suspend(&client->dev); @@ -804,6 +797,7 @@ MODULE_DEVICE_TABLE(i2c, stmfts_id); static struct i2c_driver stmfts_driver = { .driver = { .name = STMFTS_DEV_NAME, + .dev_groups = stmfts_sysfs_groups, .of_match_table = of_match_ptr(stmfts_of_match), .pm = pm_ptr(&stmfts_pm_ops), .probe_type = PROBE_PREFER_ASYNCHRONOUS, diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index 25c45c3a3561..b204fdb2d22c 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -350,13 +350,11 @@ static int stmpe_input_probe(struct platform_device *pdev) return 0; } -static int stmpe_ts_remove(struct platform_device *pdev) +static void stmpe_ts_remove(struct platform_device *pdev) { struct stmpe_touch *ts = platform_get_drvdata(pdev); stmpe_disable(ts->stmpe, STMPE_BLOCK_TOUCHSCREEN); - - return 0; } static struct platform_driver stmpe_ts_driver = { @@ -364,7 +362,7 @@ static struct platform_driver stmpe_ts_driver = { .name = STMPE_TS_NAME, }, .probe = stmpe_input_probe, - .remove = stmpe_ts_remove, + .remove_new = stmpe_ts_remove, }; module_platform_driver(stmpe_ts_driver); diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index bb3c6072fc82..92b2b840b4b7 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -375,7 +375,7 @@ static int sun4i_ts_probe(struct platform_device *pdev) return 0; } -static int sun4i_ts_remove(struct platform_device *pdev) +static void sun4i_ts_remove(struct platform_device *pdev) { struct sun4i_ts_data *ts = platform_get_drvdata(pdev); @@ -385,8 +385,6 @@ static int sun4i_ts_remove(struct platform_device *pdev) /* Deactivate all IRQs */ writel(0, ts->base + TP_INT_FIFOC); - - return 0; } static const struct of_device_id sun4i_ts_of_match[] = { @@ -403,7 +401,7 @@ static struct platform_driver sun4i_ts_driver = { .of_match_table = sun4i_ts_of_match, }, .probe = sun4i_ts_probe, - .remove = sun4i_ts_remove, + .remove_new = sun4i_ts_remove, }; module_platform_driver(sun4i_ts_driver); diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index 9aa4e35fb4f5..34324f8512ac 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -491,7 +491,7 @@ err_free_mem: return err; } -static int titsc_remove(struct platform_device *pdev) +static void titsc_remove(struct platform_device *pdev) { struct titsc *ts_dev = platform_get_drvdata(pdev); u32 steps; @@ -508,7 +508,6 @@ static int titsc_remove(struct platform_device *pdev) input_unregister_device(ts_dev->input); kfree(ts_dev); - return 0; } static int titsc_suspend(struct device *dev) @@ -552,7 +551,7 @@ MODULE_DEVICE_TABLE(of, ti_tsc_dt_ids); static struct platform_driver ti_tsc_driver = { .probe = titsc_probe, - .remove = titsc_remove, + .remove_new = titsc_remove, .driver = { .name = "TI-am335x-tsc", .pm = pm_sleep_ptr(&titsc_pm_ops), diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c index b5e904c5b7c4..89c5248f66f6 100644 --- a/drivers/input/touchscreen/tsc2004.c +++ b/drivers/input/touchscreen/tsc2004.c @@ -63,9 +63,10 @@ MODULE_DEVICE_TABLE(of, tsc2004_of_match); static struct i2c_driver tsc2004_driver = { .driver = { - .name = "tsc2004", - .of_match_table = of_match_ptr(tsc2004_of_match), - .pm = pm_sleep_ptr(&tsc200x_pm_ops), + .name = "tsc2004", + .dev_groups = tsc200x_groups, + .of_match_table = of_match_ptr(tsc2004_of_match), + .pm = pm_sleep_ptr(&tsc200x_pm_ops), }, .id_table = tsc2004_idtable, .probe = tsc2004_probe, diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index b6dfbcfc8c19..1b40ce0ca1b9 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -79,9 +79,10 @@ MODULE_DEVICE_TABLE(of, tsc2005_of_match); static struct spi_driver tsc2005_driver = { .driver = { - .name = "tsc2005", - .of_match_table = of_match_ptr(tsc2005_of_match), - .pm = pm_sleep_ptr(&tsc200x_pm_ops), + .name = "tsc2005", + .dev_groups = tsc200x_groups, + .of_match_table = of_match_ptr(tsc2005_of_match), + .pm = pm_sleep_ptr(&tsc200x_pm_ops), }, .probe = tsc2005_probe, .remove = tsc2005_remove, diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c index b799f26fcf8f..a4c0e9db9bb9 100644 --- a/drivers/input/touchscreen/tsc200x-core.c +++ b/drivers/input/touchscreen/tsc200x-core.c @@ -356,6 +356,12 @@ static const struct attribute_group tsc200x_attr_group = { .attrs = tsc200x_attrs, }; +const struct attribute_group *tsc200x_groups[] = { + &tsc200x_attr_group, + NULL +}; +EXPORT_SYMBOL_GPL(tsc200x_groups); + static void tsc200x_esd_work(struct work_struct *work) { struct tsc200x *ts = container_of(work, struct tsc200x, esd_work.work); @@ -553,25 +559,17 @@ int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id, return error; dev_set_drvdata(dev, ts); - error = sysfs_create_group(&dev->kobj, &tsc200x_attr_group); - if (error) { - dev_err(dev, - "Failed to create sysfs attributes, err: %d\n", error); - goto disable_regulator; - } error = input_register_device(ts->idev); if (error) { dev_err(dev, "Failed to register input device, err: %d\n", error); - goto err_remove_sysfs; + goto disable_regulator; } irq_set_irq_wake(irq, 1); return 0; -err_remove_sysfs: - sysfs_remove_group(&dev->kobj, &tsc200x_attr_group); disable_regulator: regulator_disable(ts->vio); return error; @@ -582,8 +580,6 @@ void tsc200x_remove(struct device *dev) { struct tsc200x *ts = dev_get_drvdata(dev); - sysfs_remove_group(&dev->kobj, &tsc200x_attr_group); - regulator_disable(ts->vio); } EXPORT_SYMBOL_GPL(tsc200x_remove); diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h index 4ded34425b21..37de91efd78e 100644 --- a/drivers/input/touchscreen/tsc200x-core.h +++ b/drivers/input/touchscreen/tsc200x-core.h @@ -70,6 +70,7 @@ extern const struct regmap_config tsc200x_regmap_config; extern const struct dev_pm_ops tsc200x_pm_ops; +extern const struct attribute_group *tsc200x_groups[]; int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id, struct regmap *regmap, diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c index cbc4750c53f9..128341a6696b 100644 --- a/drivers/input/touchscreen/wdt87xx_i2c.c +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -944,10 +944,7 @@ static struct attribute *wdt87xx_attrs[] = { &dev_attr_update_fw.attr, NULL }; - -static const struct attribute_group wdt87xx_attr_group = { - .attrs = wdt87xx_attrs, -}; +ATTRIBUTE_GROUPS(wdt87xx); static void wdt87xx_report_contact(struct input_dev *input, struct wdt87xx_sys_param *param, @@ -1104,12 +1101,6 @@ static int wdt87xx_ts_probe(struct i2c_client *client) return error; } - error = devm_device_add_group(&client->dev, &wdt87xx_attr_group); - if (error) { - dev_err(&client->dev, "create sysfs failed: %d\n", error); - return error; - } - return 0; } @@ -1172,8 +1163,9 @@ static struct i2c_driver wdt87xx_driver = { .probe = wdt87xx_ts_probe, .id_table = wdt87xx_dev_id, .driver = { - .name = WDT87XX_NAME, - .pm = pm_sleep_ptr(&wdt87xx_pm_ops), + .name = WDT87XX_NAME, + .dev_groups = wdt87xx_groups, + .pm = pm_sleep_ptr(&wdt87xx_pm_ops), .acpi_match_table = ACPI_PTR(wdt87xx_acpi_id), }, }; diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c index 319f57fb9af5..9cee26b63341 100644 --- a/drivers/input/touchscreen/wm831x-ts.c +++ b/drivers/input/touchscreen/wm831x-ts.c @@ -374,14 +374,12 @@ err_alloc: return error; } -static int wm831x_ts_remove(struct platform_device *pdev) +static void wm831x_ts_remove(struct platform_device *pdev) { struct wm831x_ts *wm831x_ts = platform_get_drvdata(pdev); free_irq(wm831x_ts->pd_irq, wm831x_ts); free_irq(wm831x_ts->data_irq, wm831x_ts); - - return 0; } static struct platform_driver wm831x_ts_driver = { @@ -389,7 +387,7 @@ static struct platform_driver wm831x_ts_driver = { .name = "wm831x-touch", }, .probe = wm831x_ts_probe, - .remove = wm831x_ts_remove, + .remove_new = wm831x_ts_remove, }; module_platform_driver(wm831x_ts_driver); diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index ac3b3dd59488..f01f6cc9b59f 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -756,11 +756,9 @@ batt_err: return ret; } -static int wm97xx_mfd_remove(struct platform_device *pdev) +static void wm97xx_mfd_remove(struct platform_device *pdev) { wm97xx_remove(&pdev->dev); - - return 0; } static int wm97xx_suspend(struct device *dev) @@ -878,7 +876,7 @@ static struct platform_driver wm97xx_mfd_driver = { .pm = pm_sleep_ptr(&wm97xx_pm_ops), }, .probe = wm97xx_mfd_probe, - .remove = wm97xx_mfd_remove, + .remove_new = wm97xx_mfd_remove, }; static int __init wm97xx_init(void) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ee9e2a2edbf5..7673bb82945b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -95,7 +95,7 @@ config IOMMU_DEBUGFS choice prompt "IOMMU default domain type" depends on IOMMU_API - default IOMMU_DEFAULT_DMA_LAZY if X86 + default IOMMU_DEFAULT_DMA_LAZY if X86 || S390 default IOMMU_DEFAULT_DMA_STRICT help Choose the type of IOMMU domain used to manage DMA API usage by @@ -150,7 +150,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || X86 + def_bool ARM64 || X86 || S390 select DMA_OPS select IOMMU_API select IOMMU_IOVA @@ -240,17 +240,6 @@ config SUN50I_IOMMU help Support for the IOMMU introduced in the Allwinner H6 SoCs. -config TEGRA_IOMMU_GART - bool "Tegra GART IOMMU Support" - depends on ARCH_TEGRA_2x_SOC - depends on TEGRA_MC - select IOMMU_API - help - Enables support for remapping discontiguous physical memory - shared with the operating system into contiguous I/O virtual - space through the GART (Graphics Address Relocation Table) - hardware included on Tegra SoCs. - config TEGRA_IOMMU_SMMU bool "NVIDIA Tegra SMMU Support" depends on ARCH_TEGRA diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 769e43d780ce..95ad9dbfbda0 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o -obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 8bd4c3b183ec..443b2c13c37b 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -23,15 +23,6 @@ config AMD_IOMMU your BIOS for an option to enable it or if you have an IVRS ACPI table. -config AMD_IOMMU_V2 - tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU - select MMU_NOTIFIER - help - This option enables support for the AMD IOMMUv2 features of the IOMMU - hardware. Select this option if you want to use devices that support - the PCI PRI and PASID interface. - config AMD_IOMMU_DEBUGFS bool "Enable AMD IOMMU internals in DebugFS" depends on AMD_IOMMU && IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 773d8aa00283..f454fbb1569e 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o -obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e2857109e966..86be1edd50ee 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; -/* IOMMUv2 specific functions */ -struct iommu_domain; - bool amd_iommu_v2_supported(void); struct amd_iommu *get_amd_iommu(unsigned int idx); u8 amd_iommu_pc_get_max_banks(unsigned int idx); @@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); -int amd_iommu_register_ppr_notifier(struct notifier_block *nb); -int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -void amd_iommu_domain_direct_map(struct iommu_domain *dom); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +/* Device capabilities */ +int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); +void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); + int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); @@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) +static inline bool check_feature(u64 mask) +{ + return (amd_iommu_efr & mask); +} + +static inline bool check_feature2(u64 mask) +{ + return (amd_iommu_efr2 & mask); +} + +static inline int check_feature_gpt_level(void) +{ + return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); +} + +static inline bool amd_iommu_gt_ppr_supported(void) { - return !!(iommu->features & mask); + return (check_feature(FEATURE_GT) && + check_feature(FEATURE_PPR)); } static inline u64 iommu_virt_to_phys(void *vaddr) @@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->iop.pt_root, root); domain->iop.root = (u64 *)(root & PAGE_MASK); domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } @@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); -extern u64 amd_iommu_efr; -extern u64 amd_iommu_efr2; - extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index dec4e5c2b66b..90b7d7950a9e 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -462,6 +462,10 @@ #define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ #define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ +/* Timeout stuff */ +#define LOOP_TIMEOUT 100000 +#define MMIO_STATUS_TIMEOUT 2000000 + extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ @@ -516,19 +520,6 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) -/* - * This struct is used to pass information about - * incoming PPR faults around. - */ -struct amd_iommu_fault { - u64 address; /* IO virtual address of the fault*/ - u32 pasid; /* Address space identifier */ - u32 sbdf; /* Originating PCI device id */ - u16 tag; /* PPR tag */ - u16 flags; /* Fault flags */ - -}; - struct amd_iommu; struct iommu_domain; @@ -555,7 +546,6 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ u64 *pgd; /* v2 pgtable pgd pointer */ }; @@ -688,9 +678,6 @@ struct amd_iommu { /* Extended features 2 */ u64 features2; - /* IOMMUv2 */ - bool is_iommu_v2; - /* PCI device id of the IOMMU device */ u16 devid; @@ -811,6 +798,14 @@ struct devid_map { bool cmd_line; }; +#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ +/* Device may request execution on memory pages */ +#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 +/* Device may request super-user privileges */ +#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 + /* * This struct contains device specific data for the IOMMU */ @@ -823,13 +818,15 @@ struct iommu_dev_data { struct protection_domain *domain; /* Domain the device is bound to */ struct device *dev; u16 devid; /* PCI Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for + + u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ + int ats_qdep; + u8 ats_enabled :1; /* ATS state */ + u8 pri_enabled :1; /* PRI state */ + u8 pasid_enabled:1; /* PASID state */ + u8 pri_tlp :1; /* PASID TLB required for PPR completions */ + u8 ppr :1; /* Enable device PPR support */ bool use_vapic; /* Enable device to use vapic mode */ bool defer_attach; @@ -896,16 +893,15 @@ extern unsigned amd_iommu_aperture_order; /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* Smallest max PASID supported by any IOMMU in the system */ -extern u32 amd_iommu_max_pasid; - -extern bool amd_iommu_v2_present; - extern bool amd_iommu_force_isolation; /* Max levels of glxval supported */ extern int amd_iommu_max_glx_val; +/* Global EFR and EFR2 registers */ +extern u64 amd_iommu_efr; +extern u64 amd_iommu_efr2; + /* * This function flushes all internal caches of * the IOMMU used by this driver. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 45efb7e5d725..64bcf3df37ee 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -83,8 +83,6 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 2000000 - #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) @@ -187,9 +185,6 @@ static int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true; -u32 amd_iommu_max_pasid __read_mostly = ~0; - -bool amd_iommu_v2_present __read_mostly; static bool amd_iommu_pc_present __read_mostly; bool amdr_ivrs_remap_support __read_mostly; @@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void) * Iterate through all the IOMMUs to get common EFR * masks among all IOMMUs and warn if found inconsistency. */ -static void get_global_efr(void) +static __init void get_global_efr(void) { struct amd_iommu *iommu; @@ -304,16 +299,6 @@ static void get_global_efr(void) pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); } -static bool check_feature_on_all_iommus(u64 mask) -{ - return !!(amd_iommu_efr & mask); -} - -static inline int check_feature_gpt_level(void) -{ - return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); -} - /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); u64 entry = start & PM_ADDR_MASK; - if (!check_feature_on_all_iommus(FEATURE_SNP)) + if (!check_feature(FEATURE_SNP)) return; /* Note: @@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, void *buf = (void *)__get_free_pages(gfp, order); if (buf && - check_feature_on_all_iommus(FEATURE_SNP) && + check_feature(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { free_pages((unsigned long)buf, order); buf = NULL; @@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return -EINVAL; return 0; @@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu) static void iommu_enable_gt(struct amd_iommu *iommu) { - if (!iommu_feature(iommu, FEATURE_GT)) + if (!check_feature(FEATURE_GT)) return; iommu_feature_enable(iommu, CONTROL_GT_EN); @@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) u64 val; struct pci_dev *pdev = iommu->dev; - if (!iommu_feature(iommu, FEATURE_PC)) + if (!check_feature(FEATURE_PC)) return; amd_iommu_pc_present = true; @@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, struct device_attribute *attr, char *buf) { - struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features); + return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); @@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); - if (!iommu->features) { - iommu->features = features; - iommu->features2 = features2; + if (!amd_iommu_efr) { + amd_iommu_efr = features; + amd_iommu_efr2 = features2; return; } @@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * Sanity check and warn if EFR values from * IVHD and MMIO conflict. */ - if (features != iommu->features || - features2 != iommu->features2) { + if (features != amd_iommu_efr || + features2 != amd_iommu_efr2) { pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", - features, iommu->features, - features2, iommu->features2); + features, amd_iommu_efr, + features2, amd_iommu_efr2); } } @@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) late_iommu_features_init(iommu); - if (iommu_feature(iommu, FEATURE_GT)) { + if (check_feature(FEATURE_GT)) { int glxval; - u32 max_pasid; u64 pasmax; - pasmax = iommu->features & FEATURE_PASID_MASK; + pasmax = amd_iommu_efr & FEATURE_PASID_MASK; pasmax >>= FEATURE_PASID_SHIFT; - max_pasid = (1 << (pasmax + 1)) - 1; + iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; - amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); - BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); - - glxval = iommu->features & FEATURE_GLXVAL_MASK; + glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; glxval >>= FEATURE_GLXVAL_SHIFT; if (amd_iommu_max_glx_val == -1) @@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); } - if (iommu_feature(iommu, FEATURE_GT) && - iommu_feature(iommu, FEATURE_PPR)) { - iommu->is_iommu_v2 = true; - amd_iommu_v2_present = true; - } - - if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { @@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); if (amd_iommu_pgtable == AMD_IOMMU_V2) { - if (!iommu_feature(iommu, FEATURE_GIOSUP) || - !iommu_feature(iommu, FEATURE_GT)) { + if (!check_feature(FEATURE_GIOSUP) || + !check_feature(FEATURE_GT)) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); amd_iommu_pgtable = AMD_IOMMU_V1; - } else if (iommu_default_passthrough()) { - pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; } } @@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) static void print_iommu_info(void) { + int i; static const char * const feat_str[] = { "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", "IA", "GA", "HE", "PC" }; - struct amd_iommu *iommu; - - for_each_iommu(iommu) { - struct pci_dev *pdev = iommu->dev; - int i; - pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); + if (amd_iommu_efr) { + pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); - if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); - - for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { - if (iommu_feature(iommu, (1ULL << i))) - pr_cont(" %s", feat_str[i]); - } + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { + if (check_feature(1ULL << i)) + pr_cont(" %s", feat_str[i]); + } - if (iommu->features & FEATURE_GAM_VAPIC) - pr_cont(" GA_vAPIC"); + if (check_feature(FEATURE_GAM_VAPIC)) + pr_cont(" GA_vAPIC"); - if (iommu->features & FEATURE_SNP) - pr_cont(" SNP"); + if (check_feature(FEATURE_SNP)) + pr_cont(" SNP"); - pr_cont("\n"); - } + pr_cont("\n"); } + if (irq_remapping_enabled) { pr_info("Interrupt remapping enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) @@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void) * Need to set and poll check the GALOGRun bit to zero before * we can set/ modify GA Log registers safely. */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return; } if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { + !check_feature(FEATURE_GAM_VAPIC)) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; return; } @@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void) * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without * setting up IOMMUv1 page table. */ - return amd_iommu_v2_present && !amd_iommu_snp_en; + return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; } -EXPORT_SYMBOL(amd_iommu_v2_supported); struct amd_iommu *get_amd_iommu(unsigned int idx) { @@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void) return -EINVAL; } - amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + amd_iommu_snp_en = check_feature(FEATURE_SNP); if (!amd_iommu_snp_en) return -EINVAL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index e9ef2e0a62f6..f818a7e254d4 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop) if (!(pdom->flags & PD_IOMMUV2_MASK)) return; - /* - * Make changes visible to IOMMUs. No need to clear gcr3 entry - * as gcr3 table is already freed. - */ + /* Clear gcr3 entry */ + amd_iommu_domain_clear_gcr3(&pdom->domain, 0); + + /* Make changes visible to IOMMUs */ amd_iommu_domain_update(pdom); /* Free page table */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b399c5741378..fcc987f5d4ed 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -45,8 +45,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define LOOP_TIMEOUT 100000 - /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) @@ -68,7 +66,6 @@ LIST_HEAD(acpihid_map); const struct iommu_ops amd_iommu_ops; const struct iommu_dirty_ops amd_dirty_ops; -static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; /* @@ -81,7 +78,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); -static int domain_enable_v2(struct protection_domain *domain, int pasids); /**************************************************************************** * @@ -324,24 +320,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev) return entry->group; } -static bool pci_iommuv2_capable(struct pci_dev *pdev) +static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data) { - static const int caps[] = { - PCI_EXT_CAP_ID_PRI, - PCI_EXT_CAP_ID_PASID, - }; - int i, pos; + return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP); +} - if (!pci_ats_supported(pdev)) - return false; +static u32 pdev_get_caps(struct pci_dev *pdev) +{ + int features; + u32 flags = 0; + + if (pci_ats_supported(pdev)) + flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; + + if (pci_pri_supported(pdev)) + flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; + + features = pci_pasid_features(pdev); + if (features >= 0) { + flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + + if (features & PCI_PASID_CAP_EXEC) + flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; - for (i = 0; i < 2; ++i) { - pos = pci_find_ext_capability(pdev, caps[i]); - if (pos == 0) - return false; + if (features & PCI_PASID_CAP_PRIV) + flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; } - return true; + return flags; +} + +static inline int pdev_enable_cap_ats(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->ats_enabled) + return 0; + + if (amd_iommu_iotlb_sup && + (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) { + ret = pci_enable_ats(pdev, PAGE_SHIFT); + if (!ret) { + dev_data->ats_enabled = 1; + dev_data->ats_qdep = pci_ats_queue_depth(pdev); + } + } + + return ret; +} + +static inline void pdev_disable_cap_ats(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->ats_enabled) { + pci_disable_ats(pdev); + dev_data->ats_enabled = 0; + } +} + +int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->pri_enabled) + return 0; + + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) { + /* + * First reset the PRI state of the device. + * FIXME: Hardcode number of outstanding requests for now + */ + if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) { + dev_data->pri_enabled = 1; + dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); + + ret = 0; + } + } + + return ret; +} + +void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->pri_enabled) { + pci_disable_pri(pdev); + dev_data->pri_enabled = 0; + } +} + +static inline int pdev_enable_cap_pasid(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->pasid_enabled) + return 0; + + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) { + /* Only allow access to user-accessible pages */ + ret = pci_enable_pasid(pdev, 0); + if (!ret) + dev_data->pasid_enabled = 1; + } + + return ret; +} + +static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->pasid_enabled) { + pci_disable_pasid(pdev); + dev_data->pasid_enabled = 0; + } +} + +static void pdev_enable_caps(struct pci_dev *pdev) +{ + pdev_enable_cap_ats(pdev); + pdev_enable_cap_pasid(pdev); + amd_iommu_pdev_enable_cap_pri(pdev); + +} + +static void pdev_disable_caps(struct pci_dev *pdev) +{ + pdev_disable_cap_ats(pdev); + pdev_disable_cap_pasid(pdev); + amd_iommu_pdev_disable_cap_pri(pdev); } /* @@ -401,8 +514,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) * it'll be forced to go into translation mode. */ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && - dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { - dev_data->iommu_v2 = iommu->is_iommu_v2; + dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) { + dev_data->flags = pdev_get_caps(to_pci_dev(dev)); } dev_iommu_priv_set(dev, dev_data); @@ -703,24 +816,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } -static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) -{ - struct amd_iommu_fault fault; - - if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { - pr_err_ratelimited("Unknown PPR request received\n"); - return; - } - - fault.address = raw[1]; - fault.pasid = PPR_PASID(raw[0]); - fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0])); - fault.tag = PPR_TAG(raw[0]); - fault.flags = PPR_FLAGS(raw[0]); - - atomic_notifier_call_chain(&ppr_notifier, 0, &fault); -} - static void iommu_poll_ppr_log(struct amd_iommu *iommu) { u32 head, tail; @@ -766,8 +861,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - /* Handle PPR entry */ - iommu_handle_ppr_entry(iommu, entry); + /* TODO: PPR Handler will be added when we add IOPF support */ /* Refresh ring-buffer information */ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); @@ -1096,7 +1190,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid, } static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid, - int status, int tag, bool gn) + int status, int tag, u8 gn) { memset(cmd, 0, sizeof(*cmd)); @@ -1300,7 +1394,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { - if (iommu_feature(iommu, FEATURE_IA)) { + if (check_feature(FEATURE_IA)) { amd_iommu_flush_all(iommu); } else { amd_iommu_flush_dte_all(iommu); @@ -1319,7 +1413,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, struct iommu_cmd cmd; int qdep; - qdep = dev_data->ats.qdep; + qdep = dev_data->ats_qdep; iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) return -EINVAL; @@ -1370,7 +1464,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } - if (dev_data->ats.enabled) + if (dev_data->ats_enabled) ret = device_flush_iotlb(dev_data, 0, ~0UL); return ret; @@ -1403,7 +1497,7 @@ static void __domain_flush_pages(struct protection_domain *domain, list_for_each_entry(dev_data, &domain->dev_list, list) { - if (!dev_data->ats.enabled) + if (!dev_data->ats_enabled) continue; ret |= device_flush_iotlb(dev_data, address, size); @@ -1579,6 +1673,42 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } +/* + * Number of GCR3 table levels required. Level must be 4-Kbyte + * page and can contain up to 512 entries. + */ +static int get_gcr3_levels(int pasids) +{ + int levels; + + if (pasids == -1) + return amd_iommu_max_glx_val; + + levels = get_count_order(pasids); + + return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; +} + +/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ +static int setup_gcr3_table(struct protection_domain *domain, int pasids) +{ + int levels = get_gcr3_levels(pasids); + + if (levels > amd_iommu_max_glx_val) + return -EINVAL; + + domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + return -ENOMEM; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + + amd_iommu_domain_update(domain); + + return 0; +} + static void set_dte_entry(struct amd_iommu *iommu, u16 devid, struct protection_domain *domain, bool ats, bool ppr) { @@ -1607,10 +1737,8 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, if (ats) flags |= DTE_FLAG_IOTLB; - if (ppr) { - if (iommu_feature(iommu, FEATURE_EPHSUP)) - pte_root |= 1ULL << DEV_ENTRY_PPR; - } + if (ppr) + pte_root |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) pte_root |= DTE_FLAG_HAD; @@ -1690,7 +1818,7 @@ static void do_attach(struct iommu_dev_data *dev_data, iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) return; - ats = dev_data->ats.enabled; + ats = dev_data->ats_enabled; /* Update data structures */ dev_data->domain = domain; @@ -1706,7 +1834,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ set_dte_entry(iommu, dev_data->devid, domain, - ats, dev_data->iommu_v2); + ats, dev_data->ppr); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); @@ -1741,48 +1869,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -static void pdev_iommuv2_disable(struct pci_dev *pdev) -{ - pci_disable_ats(pdev); - pci_disable_pri(pdev); - pci_disable_pasid(pdev); -} - -static int pdev_pri_ats_enable(struct pci_dev *pdev) -{ - int ret; - - /* Only allow access to user-accessible pages */ - ret = pci_enable_pasid(pdev, 0); - if (ret) - return ret; - - /* First reset the PRI state of the device */ - ret = pci_reset_pri(pdev); - if (ret) - goto out_err_pasid; - - /* Enable PRI */ - /* FIXME: Hardcode number of outstanding requests for now */ - ret = pci_enable_pri(pdev, 32); - if (ret) - goto out_err_pasid; - - ret = pci_enable_ats(pdev, PAGE_SHIFT); - if (ret) - goto out_err_pri; - - return 0; - -out_err_pri: - pci_disable_pri(pdev); - -out_err_pasid: - pci_disable_pasid(pdev); - - return ret; -} - /* * If a device is not yet associated with a domain, this function makes the * device visible in the domain @@ -1791,9 +1877,8 @@ static int attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data; - struct pci_dev *pdev; unsigned long flags; - int ret; + int ret = 0; spin_lock_irqsave(&domain->lock, flags); @@ -1801,45 +1886,13 @@ static int attach_device(struct device *dev, spin_lock(&dev_data->lock); - ret = -EBUSY; - if (dev_data->domain != NULL) + if (dev_data->domain != NULL) { + ret = -EBUSY; goto out; - - if (!dev_is_pci(dev)) - goto skip_ats_check; - - pdev = to_pci_dev(dev); - if (domain->flags & PD_IOMMUV2_MASK) { - struct iommu_domain *def_domain = iommu_get_dma_domain(dev); - - ret = -EINVAL; - - /* - * In case of using AMD_IOMMU_V1 page table mode and the device - * is enabling for PPR/ATS support (using v2 table), - * we need to make sure that the domain type is identity map. - */ - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && - def_domain->type != IOMMU_DOMAIN_IDENTITY) { - goto out; - } - - if (dev_data->iommu_v2) { - if (pdev_pri_ats_enable(pdev) != 0) - goto out; - - dev_data->ats.enabled = true; - dev_data->ats.qdep = pci_ats_queue_depth(pdev); - dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); - } - } else if (amd_iommu_iotlb_sup && - pci_enable_ats(pdev, PAGE_SHIFT) == 0) { - dev_data->ats.enabled = true; - dev_data->ats.qdep = pci_ats_queue_depth(pdev); } -skip_ats_check: - ret = 0; + if (dev_is_pci(dev)) + pdev_enable_caps(to_pci_dev(dev)); do_attach(dev_data, domain); @@ -1887,15 +1940,8 @@ static void detach_device(struct device *dev) do_detach(dev_data); - if (!dev_is_pci(dev)) - goto out; - - if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) - pdev_iommuv2_disable(to_pci_dev(dev)); - else if (dev_data->ats.enabled) - pci_disable_ats(to_pci_dev(dev)); - - dev_data->ats.enabled = false; + if (dev_is_pci(dev)) + pdev_disable_caps(to_pci_dev(dev)); out: spin_unlock(&dev_data->lock); @@ -1985,7 +2031,7 @@ static void update_device_table(struct protection_domain *domain) if (!iommu) continue; set_dte_entry(iommu, dev_data->devid, domain, - dev_data->ats.enabled, dev_data->iommu_v2); + dev_data->ats_enabled, dev_data->ppr); clone_aliases(iommu, dev_data->dev); } } @@ -2019,9 +2065,11 @@ void amd_iommu_domain_update(struct protection_domain *domain) static void cleanup_domain(struct protection_domain *domain) { struct iommu_dev_data *entry; - unsigned long flags; - spin_lock_irqsave(&domain->lock, flags); + lockdep_assert_held(&domain->lock); + + if (!domain->dev_cnt) + return; while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, @@ -2029,8 +2077,7 @@ static void cleanup_domain(struct protection_domain *domain) BUG_ON(!entry->domain); do_detach(entry); } - - spin_unlock_irqrestore(&domain->lock, flags); + WARN_ON(domain->dev_cnt != 0); } static void protection_domain_free(struct protection_domain *domain) @@ -2041,6 +2088,12 @@ static void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); + + if (domain->iop.root) + free_page((unsigned long)domain->iop.root); + if (domain->id) domain_id_free(domain->id); @@ -2053,18 +2106,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); - spin_lock_init(&domain->lock); - domain->id = domain_id_alloc(); - if (!domain->id) - return -ENOMEM; - INIT_LIST_HEAD(&domain->dev_list); - if (mode != PAGE_MODE_NONE) { pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) { - domain_id_free(domain->id); + if (!pt_root) return -ENOMEM; - } } amd_iommu_domain_set_pgtable(domain, pt_root, mode); @@ -2074,20 +2119,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) static int protection_domain_init_v2(struct protection_domain *domain) { - spin_lock_init(&domain->lock); - domain->id = domain_id_alloc(); - if (!domain->id) - return -ENOMEM; - INIT_LIST_HEAD(&domain->dev_list); - domain->flags |= PD_GIOV_MASK; domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - if (domain_enable_v2(domain, 1)) { - domain_id_free(domain->id); + if (setup_gcr3_table(domain, 1)) return -ENOMEM; - } return 0; } @@ -2097,57 +2134,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; int pgtable; - int mode = DEFAULT_PGTABLE_LEVEL; int ret; + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_err; + + spin_lock_init(&domain->lock); + INIT_LIST_HEAD(&domain->dev_list); + domain->nid = NUMA_NO_NODE; + + switch (type) { + /* No need to allocate io pgtable ops in passthrough mode */ + case IOMMU_DOMAIN_IDENTITY: + return domain; + case IOMMU_DOMAIN_DMA: + pgtable = amd_iommu_pgtable; + break; /* - * Force IOMMU v1 page table when iommu=pt and - * when allocating domain for pass-through devices. + * Force IOMMU v1 page table when allocating + * domain for pass-through devices. */ - if (type == IOMMU_DOMAIN_IDENTITY) { - pgtable = AMD_IOMMU_V1; - mode = PAGE_MODE_NONE; - } else if (type == IOMMU_DOMAIN_UNMANAGED) { + case IOMMU_DOMAIN_UNMANAGED: pgtable = AMD_IOMMU_V1; - } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) { - pgtable = amd_iommu_pgtable; - } else { - return NULL; + break; + default: + goto out_err; } - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - switch (pgtable) { case AMD_IOMMU_V1: - ret = protection_domain_init_v1(domain, mode); + ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL); break; case AMD_IOMMU_V2: ret = protection_domain_init_v2(domain); break; default: ret = -EINVAL; + break; } if (ret) goto out_err; - /* No need to allocate io pgtable ops in passthrough mode */ - if (type == IOMMU_DOMAIN_IDENTITY) - return domain; - - domain->nid = NUMA_NO_NODE; - pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); - if (!pgtbl_ops) { - domain_id_free(domain->id); + if (!pgtbl_ops) goto out_err; - } return domain; out_err: - kfree(domain); + protection_domain_free(domain); return NULL; } @@ -2236,19 +2276,18 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags, static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; + unsigned long flags; - domain = to_pdomain(dom); + if (!dom) + return; - if (domain->dev_cnt > 0) - cleanup_domain(domain); + domain = to_pdomain(dom); - BUG_ON(domain->dev_cnt != 0); + spin_lock_irqsave(&domain->lock, flags); - if (!dom) - return; + cleanup_domain(domain); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); + spin_unlock_irqrestore(&domain->lock, flags); protection_domain_free(domain); } @@ -2296,14 +2335,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return ret; } -static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, - unsigned long iova, size_t size) +static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, + unsigned long iova, size_t size) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; if (ops->map_pages) domain_flush_np_cache(domain, iova, size); + return 0; } static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, @@ -2541,7 +2581,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev) return dev_data->defer_attach; } -EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred); static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { @@ -2581,7 +2620,7 @@ static int amd_iommu_def_domain_type(struct device *dev) * and require remapping. * - SNP is enabled, because it prohibits DTE[Mode]=0. */ - if (dev_data->iommu_v2 && + if (pdev_pasid_supported(dev_data) && !cc_platform_has(CC_ATTR_MEM_ENCRYPT) && !amd_iommu_snp_en) { return IOMMU_DOMAIN_IDENTITY; @@ -2626,93 +2665,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -/***************************************************************************** - * - * The next functions do a basic initialization of IOMMU for pass through - * mode - * - * In passthrough mode the IOMMU is initialized and enabled but not used for - * DMA-API translation. - * - *****************************************************************************/ - -/* IOMMUv2 specific functions */ -int amd_iommu_register_ppr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&ppr_notifier, nb); -} -EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); - -int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&ppr_notifier, nb); -} -EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); - -void amd_iommu_domain_direct_map(struct iommu_domain *dom) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - - if (domain->iop.pgtbl_cfg.tlb) - free_io_pgtable_ops(&domain->iop.iop.ops); - - spin_unlock_irqrestore(&domain->lock, flags); -} -EXPORT_SYMBOL(amd_iommu_domain_direct_map); - -/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ -static int domain_enable_v2(struct protection_domain *domain, int pasids) -{ - int levels; - - /* Number of GCR3 table levels required */ - for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) - levels += 1; - - if (levels > amd_iommu_max_glx_val) - return -EINVAL; - - domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) - return -ENOMEM; - - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; - - amd_iommu_domain_update(domain); - - return 0; -} - -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) -{ - struct protection_domain *pdom = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&pdom->lock, flags); - - /* - * Save us all sanity checks whether devices already in the - * domain support IOMMUv2. Just force that the domain has no - * devices attached when it is switched into IOMMUv2 mode. - */ - ret = -EBUSY; - if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) - goto out; - - if (!pdom->gcr3_tbl) - ret = domain_enable_v2(pdom, pasids); - -out: - spin_unlock_irqrestore(&pdom->lock, flags); - return ret; -} -EXPORT_SYMBOL(amd_iommu_domain_enable_v2); - static int __flush_pasid(struct protection_domain *domain, u32 pasid, u64 address, bool size) { @@ -2750,10 +2702,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, There might be non-IOMMUv2 capable devices in an IOMMUv2 * domain. */ - if (!dev_data->ats.enabled) + if (!dev_data->ats_enabled) continue; - qdep = dev_data->ats.qdep; + qdep = dev_data->ats_qdep; iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) continue; @@ -2794,7 +2746,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, return ret; } -EXPORT_SYMBOL(amd_iommu_flush_page); static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) { @@ -2814,7 +2765,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) return ret; } -EXPORT_SYMBOL(amd_iommu_flush_tlb); static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) { @@ -2894,7 +2844,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, return ret; } -EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) { @@ -2908,7 +2857,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) return ret; } -EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag) @@ -2927,49 +2875,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, return iommu_queue_command(iommu, &cmd); } -EXPORT_SYMBOL(amd_iommu_complete_ppr); - -int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info) -{ - int max_pasids; - int pos; - - if (pdev == NULL || info == NULL) - return -EINVAL; - - if (!amd_iommu_v2_supported()) - return -EINVAL; - - memset(info, 0, sizeof(*info)); - - if (pci_ats_supported(pdev)) - info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (pos) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); - if (pos) { - int features; - - max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); - max_pasids = min(max_pasids, (1 << 20)); - - info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - info->max_pasids = min(pci_max_pasids(pdev), max_pasids); - - features = pci_pasid_features(pdev); - if (features & PCI_PASID_CAP_EXEC) - info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; - if (features & PCI_PASID_CAP_PRIV) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; - } - - return 0; -} -EXPORT_SYMBOL(amd_iommu_device_info); #ifdef CONFIG_IRQ_REMAP diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c deleted file mode 100644 index 57c2fb1146e2..000000000000 --- a/drivers/iommu/amd/iommu_v2.c +++ /dev/null @@ -1,996 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. - * Author: Joerg Roedel <jroedel@suse.de> - */ - -#define pr_fmt(fmt) "AMD-Vi: " fmt - -#include <linux/refcount.h> -#include <linux/mmu_notifier.h> -#include <linux/amd-iommu.h> -#include <linux/mm_types.h> -#include <linux/profile.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/wait.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/cc_platform.h> - -#include "amd_iommu.h" - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); - -#define PRI_QUEUE_SIZE 512 - -struct pri_queue { - atomic_t inflight; - bool finish; - int status; -}; - -struct pasid_state { - struct list_head list; /* For global state-list */ - refcount_t count; /* Reference count */ - unsigned mmu_notifier_count; /* Counting nested mmu_notifier - calls */ - struct mm_struct *mm; /* mm_struct for the faults */ - struct mmu_notifier mn; /* mmu_notifier handle */ - struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ - struct device_state *device_state; /* Link to our device_state */ - u32 pasid; /* PASID index */ - bool invalid; /* Used during setup and - teardown of the pasid */ - spinlock_t lock; /* Protect pri_queues and - mmu_notifer_count */ - wait_queue_head_t wq; /* To wait for count == 0 */ -}; - -struct device_state { - struct list_head list; - u32 sbdf; - atomic_t count; - struct pci_dev *pdev; - struct pasid_state **states; - struct iommu_domain *domain; - int pasid_levels; - int max_pasids; - amd_iommu_invalid_ppr_cb inv_ppr_cb; - amd_iommu_invalidate_ctx inv_ctx_cb; - spinlock_t lock; - wait_queue_head_t wq; -}; - -struct fault { - struct work_struct work; - struct device_state *dev_state; - struct pasid_state *state; - struct mm_struct *mm; - u64 address; - u32 pasid; - u16 tag; - u16 finish; - u16 flags; -}; - -static LIST_HEAD(state_list); -static DEFINE_SPINLOCK(state_lock); - -static struct workqueue_struct *iommu_wq; - -static void free_pasid_states(struct device_state *dev_state); - -static struct device_state *__get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - - list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->sbdf == sbdf) - return dev_state; - } - - return NULL; -} - -static struct device_state *get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - unsigned long flags; - - spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(sbdf); - if (dev_state != NULL) - atomic_inc(&dev_state->count); - spin_unlock_irqrestore(&state_lock, flags); - - return dev_state; -} - -static void free_device_state(struct device_state *dev_state) -{ - struct iommu_group *group; - - /* Get rid of any remaining pasid states */ - free_pasid_states(dev_state); - - /* - * Wait until the last reference is dropped before freeing - * the device state. - */ - wait_event(dev_state->wq, !atomic_read(&dev_state->count)); - - /* - * First detach device from domain - No more PRI requests will arrive - * from that device after it is unbound from the IOMMUv2 domain. - */ - group = iommu_group_get(&dev_state->pdev->dev); - if (WARN_ON(!group)) - return; - - iommu_detach_group(dev_state->domain, group); - - iommu_group_put(group); - - /* Everything is down now, free the IOMMUv2 domain */ - iommu_domain_free(dev_state->domain); - - /* Finally get rid of the device-state */ - kfree(dev_state); -} - -static void put_device_state(struct device_state *dev_state) -{ - if (atomic_dec_and_test(&dev_state->count)) - wake_up(&dev_state->wq); -} - -/* Must be called under dev_state->lock */ -static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, - u32 pasid, bool alloc) -{ - struct pasid_state **root, **ptr; - int level, index; - - level = dev_state->pasid_levels; - root = dev_state->states; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - ptr = &root[index]; - - if (level == 0) - break; - - if (*ptr == NULL) { - if (!alloc) - return NULL; - - *ptr = (void *)get_zeroed_page(GFP_ATOMIC); - if (*ptr == NULL) - return NULL; - } - - root = (struct pasid_state **)*ptr; - level -= 1; - } - - return ptr; -} - -static int set_pasid_state(struct device_state *dev_state, - struct pasid_state *pasid_state, - u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - int ret; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - ret = -ENOMEM; - if (ptr == NULL) - goto out_unlock; - - ret = -ENOMEM; - if (*ptr != NULL) - goto out_unlock; - - *ptr = pasid_state; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void clear_pasid_state(struct device_state *dev_state, u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - if (ptr == NULL) - goto out_unlock; - - *ptr = NULL; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); -} - -static struct pasid_state *get_pasid_state(struct device_state *dev_state, - u32 pasid) -{ - struct pasid_state **ptr, *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, false); - - if (ptr == NULL) - goto out_unlock; - - ret = *ptr; - if (ret) - refcount_inc(&ret->count); - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void free_pasid_state(struct pasid_state *pasid_state) -{ - kfree(pasid_state); -} - -static void put_pasid_state(struct pasid_state *pasid_state) -{ - if (refcount_dec_and_test(&pasid_state->count)) - wake_up(&pasid_state->wq); -} - -static void put_pasid_state_wait(struct pasid_state *pasid_state) -{ - if (!refcount_dec_and_test(&pasid_state->count)) - wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); - free_pasid_state(pasid_state); -} - -static void unbind_pasid(struct pasid_state *pasid_state) -{ - struct iommu_domain *domain; - - domain = pasid_state->device_state->domain; - - /* - * Mark pasid_state as invalid, no more faults will we added to the - * work queue after this is visible everywhere. - */ - pasid_state->invalid = true; - - /* Make sure this is visible */ - smp_wmb(); - - /* After this the device/pasid can't access the mm anymore */ - amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); - - /* Make sure no more pending faults are in the queue */ - flush_workqueue(iommu_wq); -} - -static void free_pasid_states_level1(struct pasid_state **tbl) -{ - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - free_page((unsigned long)tbl[i]); - } -} - -static void free_pasid_states_level2(struct pasid_state **tbl) -{ - struct pasid_state **ptr; - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - ptr = (struct pasid_state **)tbl[i]; - free_pasid_states_level1(ptr); - } -} - -static void free_pasid_states(struct device_state *dev_state) -{ - struct pasid_state *pasid_state; - int i; - - for (i = 0; i < dev_state->max_pasids; ++i) { - pasid_state = get_pasid_state(dev_state, i); - if (pasid_state == NULL) - continue; - - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * This will call the mn_release function and - * unbind the PASID - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); - } - - if (dev_state->pasid_levels == 2) - free_pasid_states_level2(dev_state->states); - else if (dev_state->pasid_levels == 1) - free_pasid_states_level1(dev_state->states); - else - BUG_ON(dev_state->pasid_levels != 0); - - free_page((unsigned long)dev_state->states); -} - -static struct pasid_state *mn_to_state(struct mmu_notifier *mn) -{ - return container_of(mn, struct pasid_state, mn); -} - -static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - if ((start ^ (end - 1)) < PAGE_SIZE) - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, - start); - else - amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); -} - -static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - bool run_inv_ctx_cb; - - might_sleep(); - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - run_inv_ctx_cb = !pasid_state->invalid; - - if (run_inv_ctx_cb && dev_state->inv_ctx_cb) - dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); - - unbind_pasid(pasid_state); -} - -static const struct mmu_notifier_ops iommu_mn = { - .release = mn_release, - .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs, -}; - -static void set_pri_tag_status(struct pasid_state *pasid_state, - u16 tag, int status) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - pasid_state->pri[tag].status = status; - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void finish_pri_tag(struct device_state *dev_state, - struct pasid_state *pasid_state, - u16 tag) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && - pasid_state->pri[tag].finish) { - amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, - pasid_state->pri[tag].status, tag); - pasid_state->pri[tag].finish = false; - pasid_state->pri[tag].status = PPR_SUCCESS; - } - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void handle_fault_error(struct fault *fault) -{ - int status; - - if (!fault->dev_state->inv_ppr_cb) { - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - return; - } - - status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, - fault->pasid, - fault->address, - fault->flags); - switch (status) { - case AMD_IOMMU_INV_PRI_RSP_SUCCESS: - set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); - break; - case AMD_IOMMU_INV_PRI_RSP_INVALID: - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - break; - case AMD_IOMMU_INV_PRI_RSP_FAIL: - set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); - break; - default: - BUG(); - } -} - -static bool access_error(struct vm_area_struct *vma, struct fault *fault) -{ - unsigned long requested = 0; - - if (fault->flags & PPR_FAULT_EXEC) - requested |= VM_EXEC; - - if (fault->flags & PPR_FAULT_READ) - requested |= VM_READ; - - if (fault->flags & PPR_FAULT_WRITE) - requested |= VM_WRITE; - - return (requested & ~vma->vm_flags) != 0; -} - -static void do_fault(struct work_struct *work) -{ - struct fault *fault = container_of(work, struct fault, work); - struct vm_area_struct *vma; - vm_fault_t ret = VM_FAULT_ERROR; - unsigned int flags = 0; - struct mm_struct *mm; - u64 address; - - mm = fault->state->mm; - address = fault->address; - - if (fault->flags & PPR_FAULT_USER) - flags |= FAULT_FLAG_USER; - if (fault->flags & PPR_FAULT_WRITE) - flags |= FAULT_FLAG_WRITE; - flags |= FAULT_FLAG_REMOTE; - - mmap_read_lock(mm); - vma = vma_lookup(mm, address); - if (!vma) - /* failed to get a vma in the right range */ - goto out; - - /* Check if we have the right permissions on the vma */ - if (access_error(vma, fault)) - goto out; - - ret = handle_mm_fault(vma, address, flags, NULL); -out: - mmap_read_unlock(mm); - - if (ret & VM_FAULT_ERROR) - /* failed to service fault */ - handle_fault_error(fault); - - finish_pri_tag(fault->dev_state, fault->state, fault->tag); - - put_pasid_state(fault->state); - - kfree(fault); -} - -static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) -{ - struct amd_iommu_fault *iommu_fault; - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct pci_dev *pdev = NULL; - unsigned long flags; - struct fault *fault; - bool finish; - u16 tag, devid, seg_id; - int ret; - - iommu_fault = data; - tag = iommu_fault->tag & 0x1ff; - finish = (iommu_fault->tag >> 9) & 1; - - seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); - devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); - pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), - devid & 0xff); - if (!pdev) - return -ENODEV; - - ret = NOTIFY_DONE; - - /* In kdump kernel pci dev is not initialized yet -> send INVALID */ - if (amd_iommu_is_attach_deferred(&pdev->dev)) { - amd_iommu_complete_ppr(pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out; - } - - dev_state = get_device_state(iommu_fault->sbdf); - if (dev_state == NULL) - goto out; - - pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); - if (pasid_state == NULL || pasid_state->invalid) { - /* We know the device but not the PASID -> send INVALID */ - amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out_drop_state; - } - - spin_lock_irqsave(&pasid_state->lock, flags); - atomic_inc(&pasid_state->pri[tag].inflight); - if (finish) - pasid_state->pri[tag].finish = true; - spin_unlock_irqrestore(&pasid_state->lock, flags); - - fault = kzalloc(sizeof(*fault), GFP_ATOMIC); - if (fault == NULL) { - /* We are OOM - send success and let the device re-fault */ - finish_pri_tag(dev_state, pasid_state, tag); - goto out_drop_state; - } - - fault->dev_state = dev_state; - fault->address = iommu_fault->address; - fault->state = pasid_state; - fault->tag = tag; - fault->finish = finish; - fault->pasid = iommu_fault->pasid; - fault->flags = iommu_fault->flags; - INIT_WORK(&fault->work, do_fault); - - queue_work(iommu_wq, &fault->work); - - ret = NOTIFY_OK; - -out_drop_state: - - if (ret != NOTIFY_OK && pasid_state) - put_pasid_state(pasid_state); - - put_device_state(dev_state); - -out: - pci_dev_put(pdev); - return ret; -} - -static struct notifier_block ppr_nb = { - .notifier_call = ppr_notifier, -}; - -int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct mm_struct *mm; - u32 sbdf; - int ret; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - - if (dev_state == NULL) - return -EINVAL; - - ret = -EINVAL; - if (pasid >= dev_state->max_pasids) - goto out; - - ret = -ENOMEM; - pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); - if (pasid_state == NULL) - goto out; - - - refcount_set(&pasid_state->count, 1); - init_waitqueue_head(&pasid_state->wq); - spin_lock_init(&pasid_state->lock); - - mm = get_task_mm(task); - pasid_state->mm = mm; - pasid_state->device_state = dev_state; - pasid_state->pasid = pasid; - pasid_state->invalid = true; /* Mark as valid only if we are - done with setting up the pasid */ - pasid_state->mn.ops = &iommu_mn; - - if (pasid_state->mm == NULL) - goto out_free; - - ret = mmu_notifier_register(&pasid_state->mn, mm); - if (ret) - goto out_free; - - ret = set_pasid_state(dev_state, pasid_state, pasid); - if (ret) - goto out_unregister; - - ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, - __pa(pasid_state->mm->pgd)); - if (ret) - goto out_clear_state; - - /* Now we are ready to handle faults */ - pasid_state->invalid = false; - - /* - * Drop the reference to the mm_struct here. We rely on the - * mmu_notifier release call-back to inform us when the mm - * is going away. - */ - mmput(mm); - - return 0; - -out_clear_state: - clear_pasid_state(dev_state, pasid); - -out_unregister: - mmu_notifier_unregister(&pasid_state->mn, mm); - mmput(mm); - -out_free: - free_pasid_state(pasid_state); - -out: - put_device_state(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_bind_pasid); - -void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - u32 sbdf; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - if (dev_state == NULL) - return; - - if (pasid >= dev_state->max_pasids) - goto out; - - pasid_state = get_pasid_state(dev_state, pasid); - if (pasid_state == NULL) - goto out; - /* - * Drop reference taken here. We are safe because we still hold - * the reference taken in the amd_iommu_bind_pasid function. - */ - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * Call mmu_notifier_unregister to drop our reference - * to pasid_state->mm - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ -out: - /* Drop reference taken in this function */ - put_device_state(dev_state); - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_unbind_pasid); - -int amd_iommu_init_device(struct pci_dev *pdev, int pasids) -{ - struct device_state *dev_state; - struct iommu_group *group; - unsigned long flags; - int ret, tmp; - u32 sbdf; - - might_sleep(); - - /* - * When memory encryption is active the device is likely not in a - * direct-mapped domain. Forbid using IOMMUv2 functionality for now. - */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -ENODEV; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - if (pasids <= 0 || pasids > (PASID_MASK + 1)) - return -EINVAL; - - sbdf = get_pci_sbdf_id(pdev); - - dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); - if (dev_state == NULL) - return -ENOMEM; - - spin_lock_init(&dev_state->lock); - init_waitqueue_head(&dev_state->wq); - dev_state->pdev = pdev; - dev_state->sbdf = sbdf; - - tmp = pasids; - for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) - dev_state->pasid_levels += 1; - - atomic_set(&dev_state->count, 1); - dev_state->max_pasids = pasids; - - ret = -ENOMEM; - dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); - if (dev_state->states == NULL) - goto out_free_dev_state; - - dev_state->domain = iommu_domain_alloc(&pci_bus_type); - if (dev_state->domain == NULL) - goto out_free_states; - - /* See iommu_is_default_domain() */ - dev_state->domain->type = IOMMU_DOMAIN_IDENTITY; - amd_iommu_domain_direct_map(dev_state->domain); - - ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); - if (ret) - goto out_free_domain; - - group = iommu_group_get(&pdev->dev); - if (!group) { - ret = -EINVAL; - goto out_free_domain; - } - - ret = iommu_attach_group(dev_state->domain, group); - if (ret != 0) - goto out_drop_group; - - iommu_group_put(group); - - spin_lock_irqsave(&state_lock, flags); - - if (__get_device_state(sbdf) != NULL) { - spin_unlock_irqrestore(&state_lock, flags); - ret = -EBUSY; - goto out_free_domain; - } - - list_add_tail(&dev_state->list, &state_list); - - spin_unlock_irqrestore(&state_lock, flags); - - return 0; - -out_drop_group: - iommu_group_put(group); - -out_free_domain: - iommu_domain_free(dev_state->domain); - -out_free_states: - free_page((unsigned long)dev_state->states); - -out_free_dev_state: - kfree(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_init_device); - -void amd_iommu_free_device(struct pci_dev *pdev) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) { - spin_unlock_irqrestore(&state_lock, flags); - return; - } - - list_del(&dev_state->list); - - spin_unlock_irqrestore(&state_lock, flags); - - put_device_state(dev_state); - free_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_free_device); - -int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ppr_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); - -int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ctx_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); - -static int __init amd_iommu_v2_init(void) -{ - int ret; - - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n"); - /* - * Load anyway to provide the symbols to other modules - * which may use AMD IOMMUv2 optionally. - */ - return 0; - } - - ret = -ENOMEM; - iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); - if (iommu_wq == NULL) - goto out; - - amd_iommu_register_ppr_notifier(&ppr_nb); - - pr_info("AMD IOMMUv2 loaded and initialized\n"); - - return 0; - -out: - return ret; -} - -static void __exit amd_iommu_v2_exit(void) -{ - struct device_state *dev_state, *next; - unsigned long flags; - LIST_HEAD(freelist); - - if (!amd_iommu_v2_supported()) - return; - - amd_iommu_unregister_ppr_notifier(&ppr_nb); - - flush_workqueue(iommu_wq); - - /* - * The loop below might call flush_workqueue(), so call - * destroy_workqueue() after it - */ - spin_lock_irqsave(&state_lock, flags); - - list_for_each_entry_safe(dev_state, next, &state_list, list) { - WARN_ON_ONCE(1); - - put_device_state(dev_state); - list_del(&dev_state->list); - list_add_tail(&dev_state->list, &freelist); - } - - spin_unlock_irqrestore(&state_lock, flags); - - /* - * Since free_device_state waits on the count to be zero, - * we need to free dev_state outside the spinlock. - */ - list_for_each_entry_safe(dev_state, next, &freelist, list) { - list_del(&dev_state->list); - free_device_state(dev_state); - } - - destroy_workqueue(iommu_wq); -} - -module_init(amd_iommu_v2_init); -module_exit(amd_iommu_v2_exit); diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 0b8927508427..ee05f4824bfa 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -196,7 +196,6 @@ struct apple_dart_hw { * @lock: lock for hardware operations involving this dart * @pgsize: pagesize supported by this DART * @supports_bypass: indicates if this DART supports bypass mode - * @force_bypass: force bypass mode due to pagesize mismatch? * @sid2group: maps stream ids to iommu_groups * @iommu: iommu core device */ @@ -217,7 +216,6 @@ struct apple_dart { u32 pgsize; u32 num_streams; u32 supports_bypass : 1; - u32 force_bypass : 1; struct iommu_group *sid2group[DART_MAX_STREAMS]; struct iommu_device iommu; @@ -506,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain, apple_dart_domain_flush_tlb(to_dart_domain(domain)); } -static void apple_dart_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int apple_dart_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { apple_dart_domain_flush_tlb(to_dart_domain(domain)); + return 0; } static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain, @@ -568,15 +567,17 @@ apple_dart_setup_translation(struct apple_dart_domain *domain, stream_map->dart->hw->invalidate_tlb(stream_map); } -static int apple_dart_finalize_domain(struct iommu_domain *domain, +static int apple_dart_finalize_domain(struct apple_dart_domain *dart_domain, struct apple_dart_master_cfg *cfg) { - struct apple_dart_domain *dart_domain = to_dart_domain(domain); struct apple_dart *dart = cfg->stream_maps[0].dart; struct io_pgtable_cfg pgtbl_cfg; int ret = 0; int i, j; + if (dart->pgsize > PAGE_SIZE) + return -EINVAL; + mutex_lock(&dart_domain->init_lock); if (dart_domain->finalized) @@ -597,17 +598,18 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, .iommu_dev = dart->dev, }; - dart_domain->pgtbl_ops = - alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, domain); + dart_domain->pgtbl_ops = alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, + &dart_domain->domain); if (!dart_domain->pgtbl_ops) { ret = -ENOMEM; goto done; } - domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; - domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias); - domain->geometry.force_aperture = true; + dart_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + dart_domain->domain.geometry.aperture_start = 0; + dart_domain->domain.geometry.aperture_end = + (dma_addr_t)DMA_BIT_MASK(dart->ias); + dart_domain->domain.geometry.force_aperture = true; dart_domain->finalized = true; @@ -651,47 +653,72 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain, true); } -static int apple_dart_attach_dev(struct iommu_domain *domain, - struct device *dev) +static int apple_dart_attach_dev_paging(struct iommu_domain *domain, + struct device *dev) { int ret, i; struct apple_dart_stream_map *stream_map; struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); struct apple_dart_domain *dart_domain = to_dart_domain(domain); - if (cfg->stream_maps[0].dart->force_bypass && - domain->type != IOMMU_DOMAIN_IDENTITY) - return -EINVAL; - if (!cfg->stream_maps[0].dart->supports_bypass && - domain->type == IOMMU_DOMAIN_IDENTITY) - return -EINVAL; + ret = apple_dart_finalize_domain(dart_domain, cfg); + if (ret) + return ret; - ret = apple_dart_finalize_domain(domain, cfg); + ret = apple_dart_domain_add_streams(dart_domain, cfg); if (ret) return ret; - switch (domain->type) { - default: - ret = apple_dart_domain_add_streams(dart_domain, cfg); - if (ret) - return ret; + for_each_stream_map(i, cfg, stream_map) + apple_dart_setup_translation(dart_domain, stream_map); + return 0; +} - for_each_stream_map(i, cfg, stream_map) - apple_dart_setup_translation(dart_domain, stream_map); - break; - case IOMMU_DOMAIN_BLOCKED: - for_each_stream_map(i, cfg, stream_map) - apple_dart_hw_disable_dma(stream_map); - break; - case IOMMU_DOMAIN_IDENTITY: - for_each_stream_map(i, cfg, stream_map) - apple_dart_hw_enable_bypass(stream_map); - break; - } +static int apple_dart_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); + struct apple_dart_stream_map *stream_map; + int i; - return ret; + if (!cfg->stream_maps[0].dart->supports_bypass) + return -EINVAL; + + for_each_stream_map(i, cfg, stream_map) + apple_dart_hw_enable_bypass(stream_map); + return 0; } +static const struct iommu_domain_ops apple_dart_identity_ops = { + .attach_dev = apple_dart_attach_dev_identity, +}; + +static struct iommu_domain apple_dart_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &apple_dart_identity_ops, +}; + +static int apple_dart_attach_dev_blocked(struct iommu_domain *domain, + struct device *dev) +{ + struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); + struct apple_dart_stream_map *stream_map; + int i; + + for_each_stream_map(i, cfg, stream_map) + apple_dart_hw_disable_dma(stream_map); + return 0; +} + +static const struct iommu_domain_ops apple_dart_blocked_ops = { + .attach_dev = apple_dart_attach_dev_blocked, +}; + +static struct iommu_domain apple_dart_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &apple_dart_blocked_ops, +}; + static struct iommu_device *apple_dart_probe_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); @@ -717,24 +744,26 @@ static void apple_dart_release_device(struct device *dev) kfree(cfg); } -static struct iommu_domain *apple_dart_domain_alloc(unsigned int type) +static struct iommu_domain *apple_dart_domain_alloc_paging(struct device *dev) { struct apple_dart_domain *dart_domain; - if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED && - type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED) - return NULL; - dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL); if (!dart_domain) return NULL; mutex_init(&dart_domain->init_lock); - /* no need to allocate pgtbl_ops or do any other finalization steps */ - if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED) - dart_domain->finalized = true; + if (dev) { + struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); + int ret; + ret = apple_dart_finalize_domain(dart_domain, cfg); + if (ret) { + kfree(dart_domain); + return ERR_PTR(ret); + } + } return &dart_domain->domain; } @@ -770,8 +799,6 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args) if (cfg_dart) { if (cfg_dart->supports_bypass != dart->supports_bypass) return -EINVAL; - if (cfg_dart->force_bypass != dart->force_bypass) - return -EINVAL; if (cfg_dart->pgsize != dart->pgsize) return -EINVAL; } @@ -913,7 +940,7 @@ static int apple_dart_def_domain_type(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - if (cfg->stream_maps[0].dart->force_bypass) + if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE) return IOMMU_DOMAIN_IDENTITY; if (!cfg->stream_maps[0].dart->supports_bypass) return IOMMU_DOMAIN_DMA; @@ -947,7 +974,9 @@ static void apple_dart_get_resv_regions(struct device *dev, } static const struct iommu_ops apple_dart_iommu_ops = { - .domain_alloc = apple_dart_domain_alloc, + .identity_domain = &apple_dart_identity_domain, + .blocked_domain = &apple_dart_blocked_domain, + .domain_alloc_paging = apple_dart_domain_alloc_paging, .probe_device = apple_dart_probe_device, .release_device = apple_dart_release_device, .device_group = apple_dart_device_group, @@ -957,7 +986,7 @@ static const struct iommu_ops apple_dart_iommu_ops = { .pgsize_bitmap = -1UL, /* Restricted during dart probe */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = apple_dart_attach_dev, + .attach_dev = apple_dart_attach_dev_paging, .map_pages = apple_dart_map_pages, .unmap_pages = apple_dart_unmap_pages, .flush_iotlb_all = apple_dart_flush_iotlb_all, @@ -1111,8 +1140,6 @@ static int apple_dart_probe(struct platform_device *pdev) goto err_clk_disable; } - dart->force_bypass = dart->pgsize > PAGE_SIZE; - ret = apple_dart_hw_reset(dart); if (ret) goto err_clk_disable; @@ -1136,7 +1163,8 @@ static int apple_dart_probe(struct platform_device *pdev) dev_info( &pdev->dev, "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n", - dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass); + dart->pgsize, dart->num_streams, dart->supports_bypass, + dart->pgsize > PAGE_SIZE); return 0; err_sysfs_remove: diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 8a16cd3ef487..353248ab18e7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier { #define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) struct arm_smmu_bond { - struct iommu_sva sva; struct mm_struct *mm; struct arm_smmu_mmu_notifier *smmu_mn; struct list_head list; - refcount_t refs; }; #define sva_to_bond(handle) \ @@ -38,6 +36,25 @@ struct arm_smmu_bond { static DEFINE_MUTEX(sva_lock); /* + * Write the CD to the CD tables for all masters that this domain is attached + * to. Note that this is only used to update existing CD entries in the target + * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail. + */ +static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, + int ssid, + struct arm_smmu_ctx_desc *cd) +{ + struct arm_smmu_master *master; + unsigned long flags; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + arm_smmu_write_ctx_desc(master, ssid, cd); + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} + +/* * Check if the CPU ASID is available on the SMMU side. If a private context * descriptor is using it, try to replace it. */ @@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) return cd; } - smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd); + smmu_domain = container_of(cd, struct arm_smmu_domain, cd); smmu = smmu_domain->smmu; ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, @@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); @@ -247,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); @@ -273,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, struct mm_struct *mm) { int ret; + unsigned long flags; struct arm_smmu_ctx_desc *cd; struct arm_smmu_mmu_notifier *smmu_mn; + struct arm_smmu_master *master; list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { if (smmu_mn->mn.mm == mm) { @@ -304,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd); + if (ret) { + list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head) + arm_smmu_write_ctx_desc(master, mm->pasid, NULL); + break; + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); if (ret) goto err_put_notifier; @@ -329,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) return; list_del(&smmu_mn->list); - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL); + + arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL); /* * If we went through clear(), we've already invalidated, and no @@ -345,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static struct iommu_sva * -__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) +static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) { int ret; struct arm_smmu_bond *bond; @@ -355,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); if (!master || !master->sva_enabled) - return ERR_PTR(-ENODEV); - - /* If bind() was already called for this {dev, mm} pair, reuse it. */ - list_for_each_entry(bond, &master->bonds, list) { - if (bond->mm == mm) { - refcount_inc(&bond->refs); - return &bond->sva; - } - } + return -ENODEV; bond = kzalloc(sizeof(*bond), GFP_KERNEL); if (!bond) - return ERR_PTR(-ENOMEM); + return -ENOMEM; bond->mm = mm; - bond->sva.dev = dev; - refcount_set(&bond->refs, 1); bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { @@ -380,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) } list_add(&bond->list, &master->bonds); - return &bond->sva; + return 0; err_free_bond: kfree(bond); - return ERR_PTR(ret); + return ret; } bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) @@ -550,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, } } - if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { + if (!WARN_ON(!bond)) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); kfree(bond); @@ -562,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { int ret = 0; - struct iommu_sva *handle; struct mm_struct *mm = domain->mm; mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); - if (IS_ERR(handle)) - ret = PTR_ERR(handle); + ret = __arm_smmu_sva_bind(dev, mm); mutex_unlock(&sva_lock); return ret; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bd0a596f9863..7445454c2af2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } -static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, +static void arm_smmu_sync_cd(struct arm_smmu_master *master, int ssid, bool leaf) { size_t i; - unsigned long flags; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_CD, .cfgi = { @@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, }; cmds.num = 0; - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - for (i = 0; i < master->num_streams; i++) { - cmd.cfgi.sid = master->streams[i].id; - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); - } + for (i = 0; i < master->num_streams; i++) { + cmd.cfgi.sid = master->streams[i].id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_cmdq_batch_submit(smmu, &cmds); } @@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, WRITE_ONCE(*dst, cpu_to_le64(val)); } -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, - u32 ssid) +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { __le64 *l1ptr; unsigned int idx; struct arm_smmu_l1_ctx_desc *l1_desc; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) - return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) + return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS; idx = ssid >> CTXDESC_SPLIT; - l1_desc = &cdcfg->l1_desc[idx]; + l1_desc = &cd_table->l1_desc[idx]; if (!l1_desc->l2ptr) { if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) return NULL; - l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); /* An invalid L1CD can be cached */ - arm_smmu_sync_cd(smmu_domain, ssid, false); + arm_smmu_sync_cd(master, ssid, false); } idx = ssid & (CTXDESC_L2_ENTRIES - 1); return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; } -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { /* @@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) + if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; - cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + cdptr = arm_smmu_get_cd_ptr(master, ssid); if (!cdptr) return -ENOMEM; @@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, cdptr[3] = cpu_to_le64(cd->mair); /* - * STE is live, and the SMMU might read dwords of this CD in any + * STE may be live, and the SMMU might read dwords of this CD in any * order. Ensure that it observes valid values before reading * V=1. */ - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); val = cd->tcr | #ifdef __BIG_ENDIAN @@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; - if (smmu_domain->stall_enabled) + if (cd_table->stall_enabled) val |= CTXDESC_CD_0_S; } @@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, * without first making the structure invalid. */ WRITE_ONCE(cdptr[0], cpu_to_le64(val)); - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); return 0; } -static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) { int ret; size_t l1size; size_t max_contexts; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - max_contexts = 1 << cfg->s1cdmax; + cd_table->stall_enabled = master->stall_enabled; + cd_table->s1cdmax = master->ssid_bits; + max_contexts = 1 << cd_table->s1cdmax; if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || max_contexts <= CTXDESC_L2_ENTRIES) { - cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; - cdcfg->num_l1_ents = max_contexts; + cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cd_table->num_l1_ents = max_contexts; l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); } else { - cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; - cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, + cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; + cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); - cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, - sizeof(*cdcfg->l1_desc), + cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents, + sizeof(*cd_table->l1_desc), GFP_KERNEL); - if (!cdcfg->l1_desc) + if (!cd_table->l1_desc) return -ENOMEM; - l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } - cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, + cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma, GFP_KERNEL); - if (!cdcfg->cdtab) { + if (!cd_table->cdtab) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; goto err_free_l1; @@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) return 0; err_free_l1: - if (cdcfg->l1_desc) { - devm_kfree(smmu->dev, cdcfg->l1_desc); - cdcfg->l1_desc = NULL; + if (cd_table->l1_desc) { + devm_kfree(smmu->dev, cd_table->l1_desc); + cd_table->l1_desc = NULL; } return ret; } -static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) +static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) { int i; size_t size, l1size; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (cdcfg->l1_desc) { + if (cd_table->l1_desc) { size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - for (i = 0; i < cdcfg->num_l1_ents; i++) { - if (!cdcfg->l1_desc[i].l2ptr) + for (i = 0; i < cd_table->num_l1_ents; i++) { + if (!cd_table->l1_desc[i].l2ptr) continue; dmam_free_coherent(smmu->dev, size, - cdcfg->l1_desc[i].l2ptr, - cdcfg->l1_desc[i].l2ptr_dma); + cd_table->l1_desc[i].l2ptr, + cd_table->l1_desc[i].l2ptr_dma); } - devm_kfree(smmu->dev, cdcfg->l1_desc); - cdcfg->l1_desc = NULL; + devm_kfree(smmu->dev, cd_table->l1_desc); + cd_table->l1_desc = NULL; - l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } else { - l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); } - dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); - cdcfg->cdtab_dma = 0; - cdcfg->cdtab = NULL; + dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); + cd_table->cdtab_dma = 0; + cd_table->cdtab = NULL; } bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) @@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, u64 val = le64_to_cpu(dst[0]); bool ste_live = false; struct arm_smmu_device *smmu = NULL; - struct arm_smmu_s1_cfg *s1_cfg = NULL; + struct arm_smmu_ctx_desc_cfg *cd_table = NULL; struct arm_smmu_s2_cfg *s2_cfg = NULL; struct arm_smmu_domain *smmu_domain = NULL; struct arm_smmu_cmdq_ent prefetch_cmd = { @@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (smmu_domain) { switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: - s1_cfg = &smmu_domain->s1_cfg; + cd_table = &master->cd_table; break; case ARM_SMMU_DOMAIN_S2: case ARM_SMMU_DOMAIN_NESTED: @@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, val = STRTAB_STE_0_V; /* Bypass/fault */ - if (!smmu_domain || !(s1_cfg || s2_cfg)) { + if (!smmu_domain || !(cd_table || s2_cfg)) { if (!smmu_domain && disable_bypass) val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); else @@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, return; } - if (s1_cfg) { + if (cd_table) { u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; @@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, !master->stall_enabled) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); + FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) | + FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt); } if (s2_cfg) { @@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) * careful, 007. */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid); + arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; - cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; + cmd.tlbi.asid = smmu_domain->cd.asid; } else { cmd.opcode = CMDQ_OP_TLBI_S2_IPA; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); - /* Free the CD and ASID, if we allocated them */ + /* Free the ASID or VMID */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); - if (cfg->cdcfg.cdtab) - arm_smmu_free_cd_tables(smmu_domain); - arm_smmu_free_asid(&cfg->cd); + arm_smmu_free_asid(&smmu_domain->cd); mutex_unlock(&arm_smmu_asid_lock); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int ret; u32 asid; struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; - refcount_set(&cfg->cd.refs, 1); + refcount_set(&cd->refs, 1); /* Prevent SVA from modifying the ASID until it is written to the CD */ mutex_lock(&arm_smmu_asid_lock); - ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd, + ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); if (ret) goto out_unlock; - cfg->s1cdmax = master->ssid_bits; - - smmu_domain->stall_enabled = master->stall_enabled; - - ret = arm_smmu_alloc_cd_tables(smmu_domain); - if (ret) - goto out_free_asid; - - cfg->cd.asid = (u16)asid; - cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + cd->asid = (u16)asid; + cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; - cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; - - /* - * Note that this will end up calling arm_smmu_sync_cd() before - * the master has been added to the devices list for this domain. - * This isn't an issue because the STE hasn't been installed yet. - */ - ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); - if (ret) - goto out_free_cd_tables; + cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; mutex_unlock(&arm_smmu_asid_lock); return 0; -out_free_cd_tables: - arm_smmu_free_cd_tables(smmu_domain); -out_free_asid: - arm_smmu_free_asid(&cfg->cd); out_unlock: mutex_unlock(&arm_smmu_asid_lock); return ret; } static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; @@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain, - struct arm_smmu_master *master) +static int arm_smmu_domain_finalise(struct iommu_domain *domain) { int ret; unsigned long ias, oas; @@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_domain *, - struct arm_smmu_master *, struct io_pgtable_cfg *); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); + ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; @@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; master->ats_enabled = false; arm_smmu_install_ste_for_dev(master); + /* + * Clearing the CD entry isn't strictly required to detach the domain + * since the table is uninstalled anyway, but it helps avoid confusion + * in the call to arm_smmu_write_ctx_desc on the next attach (which + * expects the entry to be empty). + */ + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab) + arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!smmu_domain->smmu) { smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain, master); - if (ret) { + ret = arm_smmu_domain_finalise(domain); + if (ret) smmu_domain->smmu = NULL; - goto out_unlock; - } - } else if (smmu_domain->smmu != smmu) { - ret = -EINVAL; - goto out_unlock; - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && - master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { + } else if (smmu_domain->smmu != smmu) ret = -EINVAL; - goto out_unlock; - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && - smmu_domain->stall_enabled != master->stall_enabled) { - ret = -EINVAL; - goto out_unlock; - } + + mutex_unlock(&smmu_domain->init_mutex); + if (ret) + return ret; master->domain = smmu_domain; @@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); - arm_smmu_install_ste_for_dev(master); - spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_add(&master->domain_head, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + if (!master->cd_table.cdtab) { + ret = arm_smmu_alloc_cd_tables(master); + if (ret) { + master->domain = NULL; + goto out_list_del; + } + } + + /* + * Prevent SVA from concurrently modifying the CD or writing to + * the CD entry + */ + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); + mutex_unlock(&arm_smmu_asid_lock); + if (ret) { + master->domain = NULL; + goto out_list_del; + } + } + + arm_smmu_install_ste_for_dev(master); + arm_smmu_enable_ats(master); + return 0; + +out_list_del: + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_del(&master->domain_head); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); -out_unlock: - mutex_unlock(&smmu_domain->init_mutex); return ret; } @@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); + if (master->cd_table.cdtab) + arm_smmu_free_cd_tables(master); kfree(master); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9915850dd4db..961205ba86d2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; -}; - -struct arm_smmu_s1_cfg { - struct arm_smmu_ctx_desc_cfg cdcfg; - struct arm_smmu_ctx_desc cd; u8 s1fmt; + /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; + /* Whether CD entries in this table have the stall bit set. */ + u8 stall_enabled:1; }; struct arm_smmu_s2_cfg { @@ -697,6 +695,8 @@ struct arm_smmu_master { struct arm_smmu_domain *domain; struct list_head domain_head; struct arm_smmu_stream *streams; + /* Locked by the iommu core using the group mutex */ + struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; bool ats_enabled; bool stall_enabled; @@ -719,13 +719,12 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - bool stall_enabled; atomic_t nr_ats_masters; enum arm_smmu_domain_stage stage; union { - struct arm_smmu_s1_cfg s1_cfg; - struct arm_smmu_s2_cfg s2_cfg; + struct arm_smmu_ctx_desc cd; + struct arm_smmu_s2_cfg s2_cfg; }; struct iommu_domain domain; @@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; extern struct arm_smmu_ctx_desc quiet_cd; -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 7f52ac67495f..549ae4dba3a6 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sc8280xp-mdss" }, + { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, { .compatible = "qcom,sm6350-mdss" }, @@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 775a3cbaff4e..97b2122032b2 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -332,12 +332,10 @@ out_unlock: return ret; } -static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type) +static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev) { struct qcom_iommu_domain *qcom_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -400,6 +398,44 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev return 0; } +static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct qcom_iommu_domain *qcom_domain; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); + unsigned int i; + + if (domain == identity_domain || !domain) + return 0; + + qcom_domain = to_qcom_iommu_domain(domain); + if (WARN_ON(!qcom_domain->iommu)) + return -EINVAL; + + pm_runtime_get_sync(qcom_iommu->dev); + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]); + + /* Disable the context bank: */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + + ctx->domain = NULL; + } + pm_runtime_put_sync(qcom_iommu->dev); + return 0; +} + +static struct iommu_domain_ops qcom_iommu_identity_ops = { + .attach_dev = qcom_iommu_identity_attach, +}; + +static struct iommu_domain qcom_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &qcom_iommu_identity_ops, +}; + static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -565,8 +601,9 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) } static const struct iommu_ops qcom_iommu_ops = { + .identity_domain = &qcom_iommu_identity_domain, .capable = qcom_iommu_capable, - .domain_alloc = qcom_iommu_domain_alloc, + .domain_alloc_paging = qcom_iommu_domain_alloc_paging, .probe_device = qcom_iommu_probe_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4b1a88f514c9..85163a83df2f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -43,14 +43,28 @@ enum iommu_dma_cookie_type { IOMMU_DMA_MSI_COOKIE, }; +enum iommu_dma_queue_type { + IOMMU_DMA_OPTS_PER_CPU_QUEUE, + IOMMU_DMA_OPTS_SINGLE_QUEUE, +}; + +struct iommu_dma_options { + enum iommu_dma_queue_type qt; + size_t fq_size; + unsigned int fq_timeout; +}; + struct iommu_dma_cookie { enum iommu_dma_cookie_type type; union { /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ struct { struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ + /* Flush queue */ + union { + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; + }; /* Number of TLB flushes that have been started */ atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that have been finished */ @@ -67,6 +81,8 @@ struct iommu_dma_cookie { /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; + /* Options for dma-iommu use */ + struct iommu_dma_options options; struct mutex mutex; }; @@ -84,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str) early_param("iommu.forcedac", iommu_dma_forcedac_setup); /* Number of entries per flush queue */ -#define IOVA_FQ_SIZE 256 +#define IOVA_DEFAULT_FQ_SIZE 256 +#define IOVA_SINGLE_FQ_SIZE 32768 /* Timeout (in ms) after which entries are flushed from the queue */ -#define IOVA_FQ_TIMEOUT 10 +#define IOVA_DEFAULT_FQ_TIMEOUT 10 +#define IOVA_SINGLE_FQ_TIMEOUT 1000 /* Flush queue entry for deferred flushing */ struct iova_fq_entry { @@ -99,18 +117,19 @@ struct iova_fq_entry { /* Per-CPU flush queue structure */ struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned int head, tail; spinlock_t lock; + unsigned int head, tail; + unsigned int mod_mask; + struct iova_fq_entry entries[]; }; #define fq_ring_for_each(i, fq) \ - for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask) static inline bool fq_full(struct iova_fq *fq) { assert_spin_locked(&fq->lock); - return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); + return (((fq->tail + 1) & fq->mod_mask) == fq->head); } static inline unsigned int fq_ring_add(struct iova_fq *fq) @@ -119,12 +138,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq) assert_spin_locked(&fq->lock); - fq->tail = (idx + 1) % IOVA_FQ_SIZE; + fq->tail = (idx + 1) & fq->mod_mask; return idx; } -static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq) { u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); unsigned int idx; @@ -141,10 +160,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) fq->entries[idx].iova_pfn, fq->entries[idx].pages); - fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + fq->head = (fq->head + 1) & fq->mod_mask; } } +static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +{ + unsigned long flags; + + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free_locked(cookie, fq); + spin_unlock_irqrestore(&fq->lock, flags); +} + static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) { atomic64_inc(&cookie->fq_flush_start_cnt); @@ -160,14 +188,11 @@ static void fq_flush_timeout(struct timer_list *t) atomic_set(&cookie->fq_timer_on, 0); fq_flush_iotlb(cookie); - for_each_possible_cpu(cpu) { - unsigned long flags; - struct iova_fq *fq; - - fq = per_cpu_ptr(cookie->fq, cpu); - spin_lock_irqsave(&fq->lock, flags); - fq_ring_free(cookie, fq); - spin_unlock_irqrestore(&fq->lock, flags); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) { + fq_ring_free(cookie, cookie->single_fq); + } else { + for_each_possible_cpu(cpu) + fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu)); } } @@ -188,7 +213,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, */ smp_mb(); - fq = raw_cpu_ptr(cookie->fq); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + fq = cookie->single_fq; + else + fq = raw_cpu_ptr(cookie->percpu_fq); + spin_lock_irqsave(&fq->lock, flags); /* @@ -196,11 +225,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, * flushed out on another CPU. This makes the fq_full() check below less * likely to be true. */ - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); if (fq_full(fq)) { fq_flush_iotlb(cookie); - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); } idx = fq_ring_add(fq); @@ -216,34 +245,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie, if (!atomic_read(&cookie->fq_timer_on) && !atomic_xchg(&cookie->fq_timer_on, 1)) mod_timer(&cookie->fq_timer, - jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); + jiffies + msecs_to_jiffies(cookie->options.fq_timeout)); } -static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +static void iommu_dma_free_fq_single(struct iova_fq *fq) { - int cpu, idx; + int idx; - if (!cookie->fq) - return; + fq_ring_for_each(idx, fq) + put_pages_list(&fq->entries[idx].freelist); + vfree(fq); +} + +static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) +{ + int cpu, idx; - del_timer_sync(&cookie->fq_timer); /* The IOVAs will be torn down separately, so just free our queued pages */ for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); + struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); fq_ring_for_each(idx, fq) put_pages_list(&fq->entries[idx].freelist); } - free_percpu(cookie->fq); + free_percpu(percpu_fq); +} + +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +{ + if (!cookie->fq_domain) + return; + + del_timer_sync(&cookie->fq_timer); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + iommu_dma_free_fq_single(cookie->single_fq); + else + iommu_dma_free_fq_percpu(cookie->percpu_fq); +} + +static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size) +{ + int i; + + fq->head = 0; + fq->tail = 0; + fq->mod_mask = fq_size - 1; + + spin_lock_init(&fq->lock); + + for (i = 0; i < fq_size; i++) + INIT_LIST_HEAD(&fq->entries[i].freelist); +} + +static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) +{ + size_t fq_size = cookie->options.fq_size; + struct iova_fq *queue; + + queue = vmalloc(struct_size(queue, entries, fq_size)); + if (!queue) + return -ENOMEM; + iommu_dma_init_one_fq(queue, fq_size); + cookie->single_fq = queue; + + return 0; +} + +static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) +{ + size_t fq_size = cookie->options.fq_size; + struct iova_fq __percpu *queue; + int cpu; + + queue = __alloc_percpu(struct_size(queue, entries, fq_size), + __alignof__(*queue)); + if (!queue) + return -ENOMEM; + + for_each_possible_cpu(cpu) + iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size); + cookie->percpu_fq = queue; + return 0; } /* sysfs updates are serialised by the mutex of the group owning @domain */ int iommu_dma_init_fq(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_fq __percpu *queue; - int i, cpu; + int rc; if (cookie->fq_domain) return 0; @@ -251,26 +341,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain) atomic64_set(&cookie->fq_flush_start_cnt, 0); atomic64_set(&cookie->fq_flush_finish_cnt, 0); - queue = alloc_percpu(struct iova_fq); - if (!queue) { + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + rc = iommu_dma_init_fq_single(cookie); + else + rc = iommu_dma_init_fq_percpu(cookie); + + if (rc) { pr_warn("iova flush queue initialization failed\n"); return -ENOMEM; } - for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(queue, cpu); - - fq->head = 0; - fq->tail = 0; - - spin_lock_init(&fq->lock); - - for (i = 0; i < IOVA_FQ_SIZE; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); - } - - cookie->fq = queue; - timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); atomic_set(&cookie->fq_timer_on, 0); /* @@ -555,6 +635,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg, } /** + * iommu_dma_init_options - Initialize dma-iommu options + * @options: The options to be initialized + * @dev: Device the options are set for + * + * This allows tuning dma-iommu specific to device properties + */ +static void iommu_dma_init_options(struct iommu_dma_options *options, + struct device *dev) +{ + /* Shadowing IOTLB flushes do better with a single large queue */ + if (dev->iommu->shadow_on_flush) { + options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE; + options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT; + options->fq_size = IOVA_SINGLE_FQ_SIZE; + } else { + options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE; + options->fq_size = IOVA_DEFAULT_FQ_SIZE; + options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT; + } +} + +/** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts @@ -614,6 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (ret) goto done_unlock; + iommu_dma_init_options(&cookie->options, dev); + /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c275fe71c4db..2c6e9094f1e9 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -24,6 +24,7 @@ typedef u32 sysmmu_iova_t; typedef u32 sysmmu_pte_t; +static struct iommu_domain exynos_identity_domain; /* We do not consider super section mapping (16MB) */ #define SECT_ORDER 20 @@ -829,7 +830,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (data->domain) { + if (&data->domain->domain != &exynos_identity_domain) { dev_dbg(data->sysmmu, "saving state\n"); __sysmmu_disable(data); } @@ -847,7 +848,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (data->domain) { + if (&data->domain->domain != &exynos_identity_domain) { dev_dbg(data->sysmmu, "restoring state\n"); __sysmmu_enable(data); } @@ -886,7 +887,7 @@ static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val) DMA_TO_DEVICE); } -static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) +static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) { struct exynos_iommu_domain *domain; dma_addr_t handle; @@ -895,9 +896,6 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) /* Check if correct PTE offsets are initialized */ BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev); - if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -980,17 +978,20 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain) kfree(domain); } -static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain, - struct device *dev) +static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { - struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - phys_addr_t pagetable = virt_to_phys(domain->pgtable); + struct exynos_iommu_domain *domain; + phys_addr_t pagetable; struct sysmmu_drvdata *data, *next; unsigned long flags; - if (!has_sysmmu(dev) || owner->domain != iommu_domain) - return; + if (owner->domain == identity_domain) + return 0; + + domain = to_exynos_domain(owner->domain); + pagetable = virt_to_phys(domain->pgtable); mutex_lock(&owner->rpm_lock); @@ -1009,15 +1010,25 @@ static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain, list_del_init(&data->domain_node); spin_unlock(&data->lock); } - owner->domain = NULL; + owner->domain = identity_domain; spin_unlock_irqrestore(&domain->lock, flags); mutex_unlock(&owner->rpm_lock); - dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", __func__, - &pagetable); + dev_dbg(dev, "%s: Restored IOMMU to IDENTITY from pgtable %pa\n", + __func__, &pagetable); + return 0; } +static struct iommu_domain_ops exynos_identity_ops = { + .attach_dev = exynos_iommu_identity_attach, +}; + +static struct iommu_domain exynos_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &exynos_identity_ops, +}; + static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain, struct device *dev) { @@ -1026,12 +1037,11 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain, struct sysmmu_drvdata *data; phys_addr_t pagetable = virt_to_phys(domain->pgtable); unsigned long flags; + int err; - if (!has_sysmmu(dev)) - return -ENODEV; - - if (owner->domain) - exynos_iommu_detach_device(owner->domain, dev); + err = exynos_iommu_identity_attach(&exynos_identity_domain, dev); + if (err) + return err; mutex_lock(&owner->rpm_lock); @@ -1219,7 +1229,7 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size, */ static int exynos_iommu_map(struct iommu_domain *iommu_domain, unsigned long l_iova, phys_addr_t paddr, size_t size, - int prot, gfp_t gfp) + size_t count, int prot, gfp_t gfp, size_t *mapped) { struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); sysmmu_pte_t *entry; @@ -1253,6 +1263,8 @@ static int exynos_iommu_map(struct iommu_domain *iommu_domain, if (ret) pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n", __func__, ret, size, iova); + else + *mapped = size; spin_unlock_irqrestore(&domain->pgtablelock, flags); @@ -1274,7 +1286,7 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain } static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain, - unsigned long l_iova, size_t size, + unsigned long l_iova, size_t size, size_t count, struct iommu_iotlb_gather *gather) { struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); @@ -1407,26 +1419,12 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev) return &data->iommu; } -static void exynos_iommu_set_platform_dma(struct device *dev) -{ - struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - - if (owner->domain) { - struct iommu_group *group = iommu_group_get(dev); - - if (group) { - exynos_iommu_detach_device(owner->domain, dev); - iommu_group_put(group); - } - } -} - static void exynos_iommu_release_device(struct device *dev) { struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); struct sysmmu_drvdata *data; - exynos_iommu_set_platform_dma(dev); + WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev)); list_for_each_entry(data, &owner->controllers, owner_node) device_link_del(data->link); @@ -1457,6 +1455,7 @@ static int exynos_iommu_of_xlate(struct device *dev, INIT_LIST_HEAD(&owner->controllers); mutex_init(&owner->rpm_lock); + owner->domain = &exynos_identity_domain; dev_iommu_priv_set(dev, owner); } @@ -1471,19 +1470,17 @@ static int exynos_iommu_of_xlate(struct device *dev, } static const struct iommu_ops exynos_iommu_ops = { - .domain_alloc = exynos_iommu_domain_alloc, + .identity_domain = &exynos_identity_domain, + .domain_alloc_paging = exynos_iommu_domain_alloc_paging, .device_group = generic_device_group, -#ifdef CONFIG_ARM - .set_platform_dma_ops = exynos_iommu_set_platform_dma, -#endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, .of_xlate = exynos_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = exynos_iommu_attach_device, - .map = exynos_iommu_map, - .unmap = exynos_iommu_unmap, + .map_pages = exynos_iommu_map, + .unmap_pages = exynos_iommu_unmap, .iova_to_phys = exynos_iommu_iova_to_phys, .free = exynos_iommu_domain_free, } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 4ac0e247ec2b..e9d2bff4659b 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -196,6 +196,13 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type) { struct fsl_dma_domain *dma_domain; + /* + * FIXME: This isn't creating an unmanaged domain since the + * default_domain_ops do not have any map/unmap function it doesn't meet + * the requirements for __IOMMU_DOMAIN_PAGING. The only purpose seems to + * allow drivers/soc/fsl/qbman/qman_portal.c to do + * fsl_pamu_configure_l1_stash() + */ if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; @@ -283,16 +290,34 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, return ret; } -static void fsl_pamu_set_platform_dma(struct device *dev) +/* + * FIXME: fsl/pamu is completely broken in terms of how it works with the iommu + * API. Immediately after probe the HW is left in an IDENTITY translation and + * the driver provides a non-working UNMANAGED domain that it can switch over + * to. However it cannot switch back to an IDENTITY translation, instead it + * switches to what looks like BLOCKING. + */ +static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + struct fsl_dma_domain *dma_domain; const u32 *prop; int len; struct pci_dev *pdev = NULL; struct pci_controller *pci_ctl; /* + * Hack to keep things working as they always have, only leaving an + * UNMANAGED domain makes it BLOCKING. + */ + if (domain == platform_domain || !domain || + domain->type != IOMMU_DOMAIN_UNMANAGED) + return 0; + + dma_domain = to_fsl_dma_domain(domain); + + /* * Use LIODN of the PCI controller while detaching a * PCI device. */ @@ -312,8 +337,18 @@ static void fsl_pamu_set_platform_dma(struct device *dev) detach_device(dev, dma_domain); else pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); + return 0; } +static struct iommu_domain_ops fsl_pamu_platform_ops = { + .attach_dev = fsl_pamu_platform_attach, +}; + +static struct iommu_domain fsl_pamu_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &fsl_pamu_platform_ops, +}; + /* Set the domain stash attribute */ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu) { @@ -395,11 +430,11 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev) } static const struct iommu_ops fsl_pamu_ops = { + .default_domain = &fsl_pamu_platform_domain, .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, .device_group = fsl_pamu_device_group, - .set_platform_dma_ops = fsl_pamu_set_platform_dma, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, .iova_to_phys = fsl_pamu_iova_to_phys, diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 1f925285104e..dee61e513be6 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = { IOMMU_REGSET_ENTRY(VCRSP), }; +static struct dentry *intel_iommu_debug; + static int iommu_regset_show(struct seq_file *m, void *unused) { struct dmar_drhd_unit *drhd; @@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level) static inline void dump_page_info(struct seq_file *m, unsigned long iova, u64 *path) { - seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n", - iova >> VTD_PAGE_SHIFT, path[5], path[4], - path[3], path[2], path[1]); + seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx", + iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]); + if (path[2]) { + seq_printf(m, "\t0x%016llx", path[2]); + if (path[1]) + seq_printf(m, "\t0x%016llx", path[1]); + } + seq_putc(m, '\n'); } static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, @@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, } } -static int __show_device_domain_translation(struct device *dev, void *data) +static int domain_translation_struct_show(struct seq_file *m, + struct device_domain_info *info, + ioasid_t pasid) { - struct dmar_domain *domain; - struct seq_file *m = data; - u64 path[6] = { 0 }; - - domain = to_dmar_domain(iommu_get_domain_for_dev(dev)); - if (!domain) - return 0; + bool scalable, found = false; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + u16 devfn, bus, seg; - seq_printf(m, "Device %s @0x%llx\n", dev_name(dev), - (u64)virt_to_phys(domain->pgd)); - seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n"); + bus = info->bus; + devfn = info->devfn; + seg = info->segment; - pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path); - seq_putc(m, '\n'); + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + struct context_entry *context; + u64 pgd, path[6] = { 0 }; + u32 sts, agaw; - /* Don't iterate */ - return 1; -} + if (seg != iommu->segment) + continue; -static int show_device_domain_translation(struct device *dev, void *data) -{ - struct iommu_group *group; + sts = dmar_readl(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_TES)) { + seq_printf(m, "DMA Remapping is not enabled on %s\n", + iommu->name); + continue; + } + if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) + scalable = true; + else + scalable = false; - group = iommu_group_get(dev); - if (group) { /* - * The group->mutex is held across the callback, which will - * block calls to iommu_attach/detach_group/device. Hence, + * The iommu->lock is held across the callback, which will + * block calls to domain_attach/domain_detach. Hence, * the domain of the device will not change during traversal. * - * All devices in an iommu group share a single domain, hence - * we only dump the domain of the first device. Even though, - * this code still possibly races with the iommu_unmap() + * Traversing page table possibly races with the iommu_unmap() * interface. This could be solved by RCU-freeing the page * table pages in the iommu_unmap() path. */ - iommu_group_for_each_dev(group, data, - __show_device_domain_translation); - iommu_group_put(group); + spin_lock(&iommu->lock); + + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context || !context_present(context)) + goto iommu_unlock; + + if (scalable) { /* scalable mode */ + struct pasid_entry *pasid_tbl, *pasid_tbl_entry; + struct pasid_dir_entry *dir_tbl, *dir_entry; + u16 dir_idx, tbl_idx, pgtt; + u64 pasid_dir_ptr; + + pasid_dir_ptr = context->lo & VTD_PAGE_MASK; + + /* Dump specified device domain mappings with PASID. */ + dir_idx = pasid >> PASID_PDE_SHIFT; + tbl_idx = pasid & PASID_PTE_MASK; + + dir_tbl = phys_to_virt(pasid_dir_ptr); + dir_entry = &dir_tbl[dir_idx]; + + pasid_tbl = get_pasid_table_from_pde(dir_entry); + if (!pasid_tbl) + goto iommu_unlock; + + pasid_tbl_entry = &pasid_tbl[tbl_idx]; + if (!pasid_pte_is_present(pasid_tbl_entry)) + goto iommu_unlock; + + /* + * According to PASID Granular Translation Type(PGTT), + * get the page table pointer. + */ + pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6; + agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2; + + switch (pgtt) { + case PASID_ENTRY_PGTT_FL_ONLY: + pgd = pasid_tbl_entry->val[2]; + break; + case PASID_ENTRY_PGTT_SL_ONLY: + case PASID_ENTRY_PGTT_NESTED: + pgd = pasid_tbl_entry->val[0]; + break; + default: + goto iommu_unlock; + } + pgd &= VTD_PAGE_MASK; + } else { /* legacy mode */ + pgd = context->lo & VTD_PAGE_MASK; + agaw = context->hi & 7; + } + + seq_printf(m, "Device %04x:%02x:%02x.%x ", + iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + if (scalable) + seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd); + else + seq_printf(m, "@0x%llx\n", pgd); + + seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n", + "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE"); + pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path); + + found = true; +iommu_unlock: + spin_unlock(&iommu->lock); + if (found) + break; } + rcu_read_unlock(); return 0; } -static int domain_translation_struct_show(struct seq_file *m, void *unused) +static int dev_domain_translation_struct_show(struct seq_file *m, void *unused) +{ + struct device_domain_info *info = (struct device_domain_info *)m->private; + + return domain_translation_struct_show(m, info, IOMMU_NO_PASID); +} +DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct); + +static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused) { - return bus_for_each_dev(&pci_bus_type, NULL, m, - show_device_domain_translation); + struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private; + struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev); + + return domain_translation_struct_show(m, info, dev_pasid->pasid); } -DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); +DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct); static void invalidation_queue_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = { void __init intel_iommu_debugfs_init(void) { - struct dentry *intel_iommu_debug = debugfs_create_dir("intel", - iommu_debugfs_dir); + intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir); debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, &iommu_regset_fops); debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug, NULL, &dmar_translation_struct_fops); - debugfs_create_file("domain_translation_struct", 0444, - intel_iommu_debug, NULL, - &domain_translation_struct_fops); debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug, NULL, &invalidation_queue_fops); #ifdef CONFIG_IRQ_REMAP @@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug, NULL, &dmar_perf_latency_fops); } + +/* + * Create a debugfs directory for each device, and then create a + * debugfs file in this directory for users to dump the page table + * of the default domain. e.g. + * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct + */ +void intel_iommu_debugfs_create_dev(struct device_domain_info *info) +{ + info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug); + + debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry, + info, &dev_domain_translation_struct_fops); +} + +/* Remove the device debugfs directory. */ +void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) +{ + debugfs_remove_recursive(info->debugfs_dentry); +} + +/* + * Create a debugfs directory per pair of {device, pasid}, then create the + * corresponding debugfs file in this directory for users to dump its page + * table. e.g. + * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct + * + * The debugfs only dumps the page tables whose mappings are created and + * destroyed by the iommu_map/unmap() interfaces. Check the mapping type + * of the domain before creating debugfs directory. + */ +void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev); + char dir_name[10]; + + sprintf(dir_name, "%x", dev_pasid->pasid); + dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry); + + debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry, + dev_pasid, &pasid_domain_translation_struct_fops); +} + +/* Remove the device pasid debugfs directory. */ +void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) +{ + debugfs_remove_recursive(dev_pasid->debugfs_dentry); +} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d1037280abf7..3531b956556c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4016,9 +4016,9 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain, } static struct iommu_domain blocking_domain = { + .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocking_domain_attach_dev, - .free = intel_iommu_domain_free } }; @@ -4028,8 +4028,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) struct iommu_domain *domain; switch (type) { - case IOMMU_DOMAIN_BLOCKED: - return &blocking_domain; case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_UNMANAGED: dmar_domain = alloc_domain(type); @@ -4111,7 +4109,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, static void intel_iommu_domain_free(struct iommu_domain *domain) { - if (domain != &si_domain->domain && domain != &blocking_domain) + if (domain != &si_domain->domain) domain_exit(to_dmar_domain(domain)); } @@ -4465,6 +4463,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } } + intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; } @@ -4474,6 +4474,7 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); + intel_iommu_debugfs_remove_dev(info); dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); @@ -4718,8 +4719,8 @@ static bool risky_device(struct pci_dev *pdev) return false; } -static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long pages = aligned_nrpages(iova, size); @@ -4729,6 +4730,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, xa_for_each(&dmar_domain->iommu_array, i, info) __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); + return 0; } static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) @@ -4766,6 +4768,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) spin_unlock_irqrestore(&dmar_domain->lock, flags); domain_detach_iommu(dmar_domain, iommu); + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); kfree(dev_pasid); out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); @@ -4821,6 +4824,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); spin_unlock_irqrestore(&dmar_domain->lock, flags); + if (domain->type & __IOMMU_DOMAIN_PAGING) + intel_iommu_debugfs_create_dev_pasid(dev_pasid); + return 0; out_detach_iommu: domain_detach_iommu(dmar_domain, iommu); @@ -4925,6 +4931,7 @@ const struct iommu_dirty_ops intel_dirty_ops = { }; const struct iommu_ops intel_iommu_ops = { + .blocked_domain = &blocking_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d796d0d9b114..65d37a138c75 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -749,12 +749,18 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS + struct dentry *debugfs_dentry; /* pointer to device directory dentry */ +#endif }; struct dev_pasid_info { struct list_head link_domain; /* link to domain siblings */ struct device *dev; ioasid_t pasid; +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS + struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */ +#endif }; static inline void __iommu_flush_cache( @@ -935,8 +941,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid #ifdef CONFIG_INTEL_IOMMU_DEBUGFS void intel_iommu_debugfs_init(void); +void intel_iommu_debugfs_create_dev(struct device_domain_info *info); +void intel_iommu_debugfs_remove_dev(struct device_domain_info *info); +void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid); +void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid); #else static inline void intel_iommu_debugfs_init(void) {} +static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {} +static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {} +static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {} +static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {} #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ extern const struct attribute_group *intel_iommu_groups[]; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c146378c7d03..f17a1113f3d6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -37,7 +37,6 @@ #include "iommu-priv.h" #include "iommu-sva.h" -#include "iommu-priv.h" static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); @@ -96,8 +95,8 @@ static const char * const iommu_group_resv_type_string[] = { static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data); static void iommu_release_device(struct device *dev); -static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, - unsigned type); +static struct iommu_domain * +__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type); static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev); static int __iommu_attach_group(struct iommu_domain *domain, @@ -184,6 +183,8 @@ static const char *iommu_domain_type_str(unsigned int t) case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: return "Translated"; + case IOMMU_DOMAIN_PLATFORM: + return "Platform"; default: return "Unknown"; } @@ -290,6 +291,10 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_lock(&iommu_device_lock); list_del(&iommu->list); spin_unlock(&iommu_device_lock); + + /* Pairs with the alloc in generic_single_device_group() */ + iommu_group_put(iommu->singleton_group); + iommu->singleton_group = NULL; } EXPORT_SYMBOL_GPL(iommu_device_unregister); @@ -404,6 +409,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) ret = PTR_ERR(iommu_dev); goto err_module_put; } + dev->iommu->iommu_dev = iommu_dev; ret = iommu_device_link(iommu_dev, dev); if (ret) @@ -418,7 +424,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) } dev->iommu_group = group; - dev->iommu->iommu_dev = iommu_dev; dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); if (ops->is_attach_deferred) dev->iommu->attach_deferred = ops->is_attach_deferred(dev); @@ -432,6 +437,7 @@ err_release: err_module_put: module_put(ops->owner); err_free: + dev->iommu->iommu_dev = NULL; dev_iommu_free(dev); return ret; } @@ -1637,6 +1643,27 @@ struct iommu_group *generic_device_group(struct device *dev) EXPORT_SYMBOL_GPL(generic_device_group); /* + * Generic device_group call-back function. It just allocates one + * iommu-group per iommu driver instance shared by every device + * probed by that iommu driver. + */ +struct iommu_group *generic_single_device_group(struct device *dev) +{ + struct iommu_device *iommu = dev->iommu->iommu_dev; + + if (!iommu->singleton_group) { + struct iommu_group *group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return group; + iommu->singleton_group = group; + } + return iommu_group_ref_get(iommu->singleton_group); +} +EXPORT_SYMBOL_GPL(generic_single_device_group); + +/* * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. */ @@ -1717,26 +1744,29 @@ struct iommu_group *fsl_mc_device_group(struct device *dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_group); -static int iommu_get_def_domain_type(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) - return IOMMU_DOMAIN_DMA; - - if (ops->def_domain_type) - return ops->def_domain_type(dev); - - return 0; -} - static struct iommu_domain * -__iommu_group_alloc_default_domain(const struct bus_type *bus, - struct iommu_group *group, int req_type) +__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { if (group->default_domain && group->default_domain->type == req_type) return group->default_domain; - return __iommu_domain_alloc(bus, req_type); + return __iommu_group_domain_alloc(group, req_type); +} + +/* + * Returns the iommu_ops for the devices in an iommu group. + * + * It is assumed that all devices in an iommu group are managed by a single + * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device + * in the group. + */ +static const struct iommu_ops *group_iommu_ops(struct iommu_group *group) +{ + struct group_device *device = + list_first_entry(&group->devices, struct group_device, list); + + lockdep_assert_held(&group->mutex); + + return dev_iommu_ops(device->dev); } /* @@ -1746,25 +1776,34 @@ __iommu_group_alloc_default_domain(const struct bus_type *bus, static struct iommu_domain * iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { - const struct bus_type *bus = - list_first_entry(&group->devices, struct group_device, list) - ->dev->bus; + const struct iommu_ops *ops = group_iommu_ops(group); struct iommu_domain *dom; lockdep_assert_held(&group->mutex); + /* + * Allow legacy drivers to specify the domain that will be the default + * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM + * domain. Do not use in new drivers. + */ + if (ops->default_domain) { + if (req_type) + return NULL; + return ops->default_domain; + } + if (req_type) - return __iommu_group_alloc_default_domain(bus, group, req_type); + return __iommu_group_alloc_default_domain(group, req_type); /* The driver gave no guidance on what type to use, try the default */ - dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type); + dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); if (dom) return dom; /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) return NULL; - dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA); + dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); if (!dom) return NULL; @@ -1808,40 +1847,109 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } -/* A target_type of 0 will select the best domain type and cannot fail */ +/* + * Combine the driver's chosen def_domain_type across all the devices in a + * group. Drivers must give a consistent result. + */ +static int iommu_get_def_domain_type(struct iommu_group *group, + struct device *dev, int cur_type) +{ + const struct iommu_ops *ops = group_iommu_ops(group); + int type; + + if (!ops->def_domain_type) + return cur_type; + + type = ops->def_domain_type(dev); + if (!type || cur_type == type) + return cur_type; + if (!cur_type) + return type; + + dev_err_ratelimited( + dev, + "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", + iommu_domain_type_str(cur_type), iommu_domain_type_str(type), + group->id); + + /* + * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY + * takes precedence. + */ + if (type == IOMMU_DOMAIN_IDENTITY) + return type; + return cur_type; +} + +/* + * A target_type of 0 will select the best domain type. 0 can be returned in + * this case meaning the global default should be used. + */ static int iommu_get_default_domain_type(struct iommu_group *group, int target_type) { - int best_type = target_type; + struct device *untrusted = NULL; struct group_device *gdev; - struct device *last_dev; + int driver_type = 0; lockdep_assert_held(&group->mutex); + /* + * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an + * identity_domain and it will automatically become their default + * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. + * Override the selection to IDENTITY. + */ + if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { + static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && + IS_ENABLED(CONFIG_IOMMU_DMA))); + driver_type = IOMMU_DOMAIN_IDENTITY; + } + for_each_group_device(group, gdev) { - unsigned int type = iommu_get_def_domain_type(gdev->dev); - - if (best_type && type && best_type != type) { - if (target_type) { - dev_err_ratelimited( - gdev->dev, - "Device cannot be in %s domain\n", - iommu_domain_type_str(target_type)); + driver_type = iommu_get_def_domain_type(group, gdev->dev, + driver_type); + + if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { + /* + * No ARM32 using systems will set untrusted, it cannot + * work. + */ + if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) return -1; - } + untrusted = gdev->dev; + } + } - dev_warn( - gdev->dev, - "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", - iommu_domain_type_str(type), dev_name(last_dev), - iommu_domain_type_str(best_type)); - return 0; + /* + * If the common dma ops are not selected in kconfig then we cannot use + * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been + * selected. + */ + if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { + if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) + return -1; + if (!driver_type) + driver_type = IOMMU_DOMAIN_IDENTITY; + } + + if (untrusted) { + if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { + dev_err_ratelimited( + untrusted, + "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", + group->id, iommu_domain_type_str(driver_type)); + return -1; } - if (!best_type) - best_type = type; - last_dev = gdev->dev; + driver_type = IOMMU_DOMAIN_DMA; } - return best_type; + + if (target_type) { + if (driver_type && target_type != driver_type) + return -1; + return target_type; + } + return driver_type; } static void iommu_group_do_probe_finalize(struct device *dev) @@ -1970,16 +2078,24 @@ void iommu_set_fault_handler(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_set_fault_handler); -static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, - unsigned type) +static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, + struct device *dev, + unsigned int type) { struct iommu_domain *domain; unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; - if (bus == NULL || bus->iommu_ops == NULL) + if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain) + return ops->identity_domain; + else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain) + return ops->blocked_domain; + else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging) + domain = ops->domain_alloc_paging(dev); + else if (ops->domain_alloc) + domain = ops->domain_alloc(alloc_type); + else return NULL; - domain = bus->iommu_ops->domain_alloc(alloc_type); if (!domain) return NULL; @@ -1989,10 +2105,10 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, * may override this later */ if (!domain->pgsize_bitmap) - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + domain->pgsize_bitmap = ops->pgsize_bitmap; if (!domain->ops) - domain->ops = bus->iommu_ops->default_domain_ops; + domain->ops = ops->default_domain_ops; if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); @@ -2001,9 +2117,22 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, return domain; } +static struct iommu_domain * +__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type) +{ + struct device *dev = + list_first_entry(&group->devices, struct group_device, list) + ->dev; + + return __iommu_domain_alloc(group_iommu_ops(group), dev, type); +} + struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { - return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); + if (bus == NULL || bus->iommu_ops == NULL) + return NULL; + return __iommu_domain_alloc(bus->iommu_ops, NULL, + IOMMU_DOMAIN_UNMANAGED); } EXPORT_SYMBOL_GPL(iommu_domain_alloc); @@ -2012,7 +2141,8 @@ void iommu_domain_free(struct iommu_domain *domain) if (domain->type == IOMMU_DOMAIN_SVA) mmdrop(domain->mm); iommu_put_dma_cookie(domain); - domain->ops->free(domain); + if (domain->ops->free) + domain->ops->free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); @@ -2062,10 +2192,10 @@ static int __iommu_attach_device(struct iommu_domain *domain, */ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct iommu_group *group; + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; int ret; - group = iommu_group_get(dev); if (!group) return -ENODEV; @@ -2082,8 +2212,6 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) out_unlock: mutex_unlock(&group->mutex); - iommu_group_put(group); - return ret; } EXPORT_SYMBOL_GPL(iommu_attach_device); @@ -2098,9 +2226,9 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) void iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct iommu_group *group; + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; - group = iommu_group_get(dev); if (!group) return; @@ -2112,24 +2240,18 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) out_unlock: mutex_unlock(&group->mutex); - iommu_group_put(group); } EXPORT_SYMBOL_GPL(iommu_detach_device); struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) { - struct iommu_domain *domain; - struct iommu_group *group; + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; - group = iommu_group_get(dev); if (!group) return NULL; - domain = group->domain; - - iommu_group_put(group); - - return domain; + return group->domain; } EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); @@ -2275,21 +2397,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, if (group->domain == new_domain) return 0; - /* - * New drivers should support default domains, so set_platform_dma() - * op will never be called. Otherwise the NULL domain represents some - * platform specific behavior. - */ - if (!new_domain) { - for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - - if (!WARN_ON(!ops->set_platform_dma_ops)) - ops->set_platform_dma_ops(gdev->dev); - } - group->domain = NULL; - return 0; - } + if (WARN_ON(!new_domain)) + return -EINVAL; /* * Changing the domain is done by calling attach_dev() on the new @@ -2325,19 +2434,15 @@ err_revert: */ last_gdev = gdev; for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - /* - * If set_platform_dma_ops is not present a NULL domain can - * happen only for first probe, in which case we leave - * group->domain as NULL and let release clean everything up. + * A NULL domain can happen only for first probe, in which case + * we leave group->domain as NULL and let release clean + * everything up. */ if (group->domain) WARN_ON(__iommu_device_set_domain( group, gdev->dev, group->domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); - else if (ops->set_platform_dma_ops) - ops->set_platform_dma_ops(gdev->dev); if (gdev == last_gdev) break; } @@ -2418,30 +2523,6 @@ out_set_count: return pgsize; } -static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, - gfp_t gfp, size_t *mapped) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; - int ret; - - pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", - iova, &paddr, pgsize, count); - - if (ops->map_pages) { - ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, - gfp, mapped); - } else { - ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); - *mapped = ret ? 0 : pgsize; - } - - return ret; -} - static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { @@ -2452,13 +2533,12 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(!(ops->map || ops->map_pages) || - domain->pgsize_bitmap == 0UL)) - return -ENODEV; - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) return -EINVAL; + if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) + return -ENODEV; + /* find out the minimum page size supported */ min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -2476,10 +2556,14 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t mapped = 0; + size_t pgsize, count, mapped = 0; + + pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, - &mapped); + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", + iova, &paddr, pgsize, count); + ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, + gfp, &mapped); /* * Some pages may have been mapped, even if an error occurred, * so we should account for those so they can be unmapped. @@ -2516,25 +2600,21 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, return -EINVAL; ret = __iommu_map(domain, iova, paddr, size, prot, gfp); - if (ret == 0 && ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, size); + if (ret == 0 && ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, size); + if (ret) + goto out_err; + } return ret; -} -EXPORT_SYMBOL_GPL(iommu_map); -static size_t __iommu_unmap_pages(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *iotlb_gather) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; +out_err: + /* undo mappings already done */ + iommu_unmap(domain, iova, size); - pgsize = iommu_pgsize(domain, iova, iova, size, &count); - return ops->unmap_pages ? - ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : - ops->unmap(domain, iova, pgsize, iotlb_gather); + return ret; } +EXPORT_SYMBOL_GPL(iommu_map); static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, @@ -2545,11 +2625,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long orig_iova = iova; unsigned int min_pagesz; - if (unlikely(!(ops->unmap || ops->unmap_pages) || - domain->pgsize_bitmap == 0UL)) + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) return 0; - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) + if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) return 0; /* find out the minimum page size supported */ @@ -2573,9 +2652,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain, * or we hit an area that isn't mapped. */ while (unmapped < size) { - unmapped_page = __iommu_unmap_pages(domain, iova, - size - unmapped, - iotlb_gather); + size_t pgsize, count; + + pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); + unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); if (!unmapped_page) break; @@ -2658,8 +2738,11 @@ next: sg = sg_next(sg); } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, mapped); + if (ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, mapped); + if (ret) + goto out_err; + } return mapped; out_err: @@ -2957,21 +3040,9 @@ static int iommu_setup_default_domain(struct iommu_group *group, if (req_type < 0) return -EINVAL; - /* - * There are still some drivers which don't support default domains, so - * we ignore the failure and leave group->default_domain NULL. - * - * We assume that the iommu driver starts up the device in - * 'set_platform_dma_ops' mode if it does not support default domains. - */ dom = iommu_group_alloc_default_domain(group, req_type); - if (!dom) { - /* Once in default_domain mode we never leave */ - if (group->default_domain) - return -ENODEV; - group->default_domain = NULL; - return 0; - } + if (!dom) + return -ENODEV; if (group->default_domain == dom) return 0; @@ -3114,24 +3185,6 @@ out_unlock: return ret ?: count; } -static bool iommu_is_default_domain(struct iommu_group *group) -{ - if (group->domain == group->default_domain) - return true; - - /* - * If the default domain was set to identity and it is still an identity - * domain then we consider this a pass. This happens because of - * amd_iommu_init_device() replacing the default idenytity domain with an - * identity domain that has a different configuration for AMDGPU. - */ - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY && - group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) - return true; - return false; -} - /** * iommu_device_use_default_domain() - Device driver wants to handle device * DMA through the kernel DMA API. @@ -3142,7 +3195,8 @@ static bool iommu_is_default_domain(struct iommu_group *group) */ int iommu_device_use_default_domain(struct device *dev) { - struct iommu_group *group = iommu_group_get(dev); + /* Caller is the driver core during the pre-probe path */ + struct iommu_group *group = dev->iommu_group; int ret = 0; if (!group) @@ -3150,7 +3204,7 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); if (group->owner_cnt) { - if (group->owner || !iommu_is_default_domain(group) || + if (group->domain != group->default_domain || group->owner || !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; @@ -3161,8 +3215,6 @@ int iommu_device_use_default_domain(struct device *dev) unlock_out: mutex_unlock(&group->mutex); - iommu_group_put(group); - return ret; } @@ -3176,7 +3228,8 @@ unlock_out: */ void iommu_device_unuse_default_domain(struct device *dev) { - struct iommu_group *group = iommu_group_get(dev); + /* Caller is the driver core during the post-probe path */ + struct iommu_group *group = dev->iommu_group; if (!group) return; @@ -3186,26 +3239,22 @@ void iommu_device_unuse_default_domain(struct device *dev) group->owner_cnt--; mutex_unlock(&group->mutex); - iommu_group_put(group); } static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) { - struct group_device *dev = - list_first_entry(&group->devices, struct group_device, list); - if (group->blocking_domain) return 0; group->blocking_domain = - __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); + __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED); if (!group->blocking_domain) { /* * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED * create an empty domain instead. */ - group->blocking_domain = __iommu_domain_alloc( - dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); + group->blocking_domain = __iommu_group_domain_alloc( + group, IOMMU_DOMAIN_UNMANAGED); if (!group->blocking_domain) return -EINVAL; } @@ -3273,13 +3322,13 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); */ int iommu_device_claim_dma_owner(struct device *dev, void *owner) { - struct iommu_group *group; + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; int ret = 0; if (WARN_ON(!owner)) return -EINVAL; - group = iommu_group_get(dev); if (!group) return -ENODEV; @@ -3296,8 +3345,6 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner) ret = __iommu_take_dma_ownership(group, owner); unlock_out: mutex_unlock(&group->mutex); - iommu_group_put(group); - return ret; } EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); @@ -3335,7 +3382,8 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); */ void iommu_device_release_dma_owner(struct device *dev) { - struct iommu_group *group = iommu_group_get(dev); + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; mutex_lock(&group->mutex); if (group->owner_cnt > 1) @@ -3343,7 +3391,6 @@ void iommu_device_release_dma_owner(struct device *dev) else __iommu_release_dma_ownership(group); mutex_unlock(&group->mutex); - iommu_group_put(group); } EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); @@ -3404,14 +3451,14 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { - struct iommu_group *group; + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; void *curr; int ret; if (!domain->ops->set_dev_pasid) return -EOPNOTSUPP; - group = iommu_group_get(dev); if (!group) return -ENODEV; @@ -3429,8 +3476,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, } out_unlock: mutex_unlock(&group->mutex); - iommu_group_put(group); - return ret; } EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); @@ -3447,14 +3492,13 @@ EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { - struct iommu_group *group = iommu_group_get(dev); + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; mutex_lock(&group->mutex); __iommu_remove_group_pasid(group, pasid); WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); mutex_unlock(&group->mutex); - - iommu_group_put(group); } EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); @@ -3476,10 +3520,10 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, unsigned int type) { + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; struct iommu_domain *domain; - struct iommu_group *group; - group = iommu_group_get(dev); if (!group) return NULL; @@ -3488,7 +3532,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, if (type && domain && domain->type != type) domain = ERR_PTR(-EBUSY); xa_unlock(&group->pasid_array); - iommu_group_put(group); return domain; } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d43a87737c1e..5d93434003d8 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -123,10 +123,6 @@ struct selftest_obj { }; }; -static void mock_domain_blocking_free(struct iommu_domain *domain) -{ -} - static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { @@ -139,7 +135,6 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, } static const struct iommu_domain_ops mock_blocking_ops = { - .free = mock_domain_blocking_free, .attach_dev = mock_domain_nop_attach, }; @@ -258,15 +253,6 @@ __mock_domain_alloc_nested(struct mock_iommu_domain *mock_parent, return &mock_nested->domain; } -static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) -{ - if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED) - return &mock_blocking_domain; - if (iommu_domain_type == IOMMU_DOMAIN_UNMANAGED) - return mock_domain_alloc_paging(NULL); - return NULL; -} - static struct iommu_domain * mock_domain_alloc_user(struct device *dev, u32 flags, struct iommu_domain *parent, @@ -446,14 +432,6 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) return false; } -static void mock_domain_set_plaform_dma_ops(struct device *dev) -{ - /* - * mock doesn't setup default domains because we can't hook into the - * normal probe path - */ -} - static struct iommu_device mock_iommu_device = { }; @@ -463,13 +441,18 @@ static struct iommu_device *mock_probe_device(struct device *dev) } static const struct iommu_ops mock_ops = { + /* + * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() + * because it is zero. + */ + .default_domain = &mock_blocking_domain, + .blocked_domain = &mock_blocking_domain, .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, - .domain_alloc = mock_domain_alloc, + .domain_alloc_paging = mock_domain_alloc_paging, .domain_alloc_user = mock_domain_alloc_user, .capable = mock_domain_capable, - .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, .device_group = generic_device_group, .probe_device = mock_probe_device, .default_domain_ops = diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 10b964600948..d30e453d0fb4 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/bitops.h> #include <linux/cpu.h> +#include <linux/workqueue.h> /* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL @@ -622,15 +623,21 @@ EXPORT_SYMBOL_GPL(reserve_iova); /* * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to * assure size of 'iova_magazine' to be 1024 bytes, so that no memory - * will be wasted. + * will be wasted. Since only full magazines are inserted into the depot, + * we don't need to waste PFN capacity on a separate list head either. */ #define IOVA_MAG_SIZE 127 -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ + +#define IOVA_DEPOT_DELAY msecs_to_jiffies(100) struct iova_magazine { - unsigned long size; + union { + unsigned long size; + struct iova_magazine *next; + }; unsigned long pfns[IOVA_MAG_SIZE]; }; +static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1))); struct iova_cpu_rcache { spinlock_t lock; @@ -640,9 +647,11 @@ struct iova_cpu_rcache { struct iova_rcache { spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + unsigned int depot_size; + struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; + struct iova_domain *iovad; + struct delayed_work work; }; static struct iova_magazine *iova_magazine_alloc(gfp_t flags) @@ -717,6 +726,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) mag->pfns[mag->size++] = pfn; } +static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) +{ + struct iova_magazine *mag = rcache->depot; + + rcache->depot = mag->next; + mag->size = IOVA_MAG_SIZE; + rcache->depot_size--; + return mag; +} + +static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) +{ + mag->next = rcache->depot; + rcache->depot = mag; + rcache->depot_size++; +} + +static void iova_depot_work_func(struct work_struct *work) +{ + struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work); + struct iova_magazine *mag = NULL; + unsigned long flags; + + spin_lock_irqsave(&rcache->lock, flags); + if (rcache->depot_size > num_online_cpus()) + mag = iova_depot_pop(rcache); + spin_unlock_irqrestore(&rcache->lock, flags); + + if (mag) { + iova_magazine_free_pfns(mag, rcache->iovad); + iova_magazine_free(mag); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); + } +} + int iova_domain_init_rcaches(struct iova_domain *iovad) { unsigned int cpu; @@ -734,7 +778,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad) rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); - rcache->depot_size = 0; + rcache->iovad = iovad; + INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (!rcache->cpu_rcaches) { @@ -776,7 +821,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, struct iova_rcache *rcache, unsigned long iova_pfn) { - struct iova_magazine *mag_to_free = NULL; struct iova_cpu_rcache *cpu_rcache; bool can_insert = false; unsigned long flags; @@ -794,13 +838,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, if (new_mag) { spin_lock(&rcache->lock); - if (rcache->depot_size < MAX_GLOBAL_MAGS) { - rcache->depot[rcache->depot_size++] = - cpu_rcache->loaded; - } else { - mag_to_free = cpu_rcache->loaded; - } + iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); cpu_rcache->loaded = new_mag; can_insert = true; @@ -812,11 +852,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, spin_unlock_irqrestore(&cpu_rcache->lock, flags); - if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); - iova_magazine_free(mag_to_free); - } - return can_insert; } @@ -854,9 +889,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, has_pfn = true; } else { spin_lock(&rcache->lock); - if (rcache->depot_size > 0) { + if (rcache->depot) { iova_magazine_free(cpu_rcache->loaded); - cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; + cpu_rcache->loaded = iova_depot_pop(rcache); has_pfn = true; } spin_unlock(&rcache->lock); @@ -895,9 +930,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) struct iova_rcache *rcache; struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; - int i, j; - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; if (!rcache->cpu_rcaches) break; @@ -907,8 +941,9 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) - iova_magazine_free(rcache->depot[j]); + cancel_delayed_work_sync(&rcache->work); + while (rcache->depot) + iova_magazine_free(iova_depot_pop(rcache)); } kfree(iovad->rcaches); @@ -942,16 +977,16 @@ static void free_global_cached_iovas(struct iova_domain *iovad) { struct iova_rcache *rcache; unsigned long flags; - int i, j; - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; spin_lock_irqsave(&rcache->lock, flags); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + while (rcache->depot) { + struct iova_magazine *mag = iova_depot_pop(rcache); + + iova_magazine_free_pfns(mag, iovad); + iova_magazine_free(mag); } - rcache->depot_size = 0; spin_unlock_irqrestore(&rcache->lock, flags); } } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 65ff69477c43..ace1fc4bd34b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -64,7 +64,6 @@ struct ipmmu_vmsa_device { struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; s8 utlb_ctx[IPMMU_UTLB_MAX]; - struct iommu_group *group; struct dma_iommu_mapping *mapping; }; @@ -295,6 +294,18 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, mmu->utlb_ctx[utlb] = domain->context_id; } +/* + * Disable MMU translation for the microTLB. + */ +static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, + unsigned int utlb) +{ + struct ipmmu_vmsa_device *mmu = domain->mmu; + + ipmmu_imuctr_write(mmu, utlb, 0); + mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; +} + static void ipmmu_tlb_flush_all(void *cookie) { struct ipmmu_vmsa_domain *domain = cookie; @@ -551,13 +562,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) * IOMMU Operations */ -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev) { struct ipmmu_vmsa_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -627,6 +635,36 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return 0; } +static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct ipmmu_vmsa_domain *domain; + unsigned int i; + + if (io_domain == identity_domain || !io_domain) + return 0; + + domain = to_vmsa_domain(io_domain); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_disable(domain, fwspec->ids[i]); + + /* + * TODO: Optimize by disabling the context when no device is attached. + */ + return 0; +} + +static struct iommu_domain_ops ipmmu_iommu_identity_ops = { + .attach_dev = ipmmu_iommu_identity_attach, +}; + +static struct iommu_domain ipmmu_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &ipmmu_iommu_identity_ops, +}; + static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -833,28 +871,18 @@ static void ipmmu_release_device(struct device *dev) arm_iommu_release_mapping(mmu->mapping); } -static struct iommu_group *ipmmu_find_group(struct device *dev) -{ - struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); - struct iommu_group *group; - - if (mmu->group) - return iommu_group_ref_get(mmu->group); - - group = iommu_group_alloc(); - if (!IS_ERR(group)) - mmu->group = group; - - return group; -} - static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc, + .identity_domain = &ipmmu_iommu_identity_domain, + .domain_alloc_paging = ipmmu_domain_alloc_paging, .probe_device = ipmmu_probe_device, .release_device = ipmmu_release_device, .probe_finalize = ipmmu_probe_finalize, + /* + * FIXME: The device grouping is a fixed property of the hardware's + * ability to isolate and control DMA, it should not depend on kconfig. + */ .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA) - ? generic_device_group : ipmmu_find_group, + ? generic_device_group : generic_single_device_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 79d89bad5132..f86af9815d6f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -302,13 +302,10 @@ static void __program_context(void __iomem *base, int ctx, SET_M(base, ctx, 1); } -static struct iommu_domain *msm_iommu_domain_alloc(unsigned type) +static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev) { struct msm_priv *priv; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto fail_nomem; @@ -443,15 +440,20 @@ fail: return ret; } -static void msm_iommu_set_platform_dma(struct device *dev) +static int msm_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct msm_priv *priv = to_msm_priv(domain); + struct msm_priv *priv; unsigned long flags; struct msm_iommu_dev *iommu; struct msm_iommu_ctx_dev *master; - int ret; + int ret = 0; + + if (domain == identity_domain || !domain) + return 0; + priv = to_msm_priv(domain); free_io_pgtable_ops(priv->iop); spin_lock_irqsave(&msm_iommu_lock, flags); @@ -468,8 +470,18 @@ static void msm_iommu_set_platform_dma(struct device *dev) } fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; } +static struct iommu_domain_ops msm_iommu_identity_ops = { + .attach_dev = msm_iommu_identity_attach, +}; + +static struct iommu_domain msm_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &msm_iommu_identity_ops, +}; + static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -486,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct msm_priv *priv = to_msm_priv(domain); __flush_iotlb_range(iova, size, SZ_4K, false, priv); + return 0; } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -675,10 +688,10 @@ fail: } static struct iommu_ops msm_iommu_ops = { - .domain_alloc = msm_iommu_domain_alloc, + .identity_domain = &msm_iommu_identity_domain, + .domain_alloc_paging = msm_iommu_domain_alloc_paging, .probe_device = msm_iommu_probe_device, .device_group = generic_device_group, - .set_platform_dma_ops = msm_iommu_set_platform_dma, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index fab6c347ce57..75279500a4a8 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -688,13 +688,10 @@ update_iova_region: return 0; } -static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) +static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev) { struct mtk_iommu_domain *dom; - if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -776,6 +773,28 @@ err_unlock: return ret; } +static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + + if (domain == identity_domain || !domain) + return 0; + + mtk_iommu_config(data, dev, false, 0); + return 0; +} + +static struct iommu_domain_ops mtk_iommu_identity_ops = { + .attach_dev = mtk_iommu_identity_attach, +}; + +static struct iommu_domain mtk_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &mtk_iommu_identity_ops, +}; + static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -817,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank); } -static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank); + return 0; } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, @@ -995,7 +1015,8 @@ static void mtk_iommu_get_resv_regions(struct device *dev, } static const struct iommu_ops mtk_iommu_ops = { - .domain_alloc = mtk_iommu_domain_alloc, + .identity_domain = &mtk_iommu_identity_domain, + .domain_alloc_paging = mtk_iommu_domain_alloc_paging, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 8a0a5e5d049f..67e044c1a7d9 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -270,13 +270,10 @@ static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data) return 0; } -static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type) +static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev) { struct mtk_iommu_v1_domain *dom; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -319,13 +316,24 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device return 0; } -static void mtk_iommu_v1_set_platform_dma(struct device *dev) +static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); mtk_iommu_v1_config(data, dev, false); + return 0; } +static struct iommu_domain_ops mtk_iommu_v1_identity_ops = { + .attach_dev = mtk_iommu_v1_identity_attach, +}; + +static struct iommu_domain mtk_iommu_v1_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &mtk_iommu_v1_identity_ops, +}; + static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -441,11 +449,6 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_arg return 0; } -static int mtk_iommu_v1_def_domain_type(struct device *dev) -{ - return IOMMU_DOMAIN_UNMANAGED; -} - static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -578,14 +581,13 @@ static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data) } static const struct iommu_ops mtk_iommu_v1_ops = { - .domain_alloc = mtk_iommu_v1_domain_alloc, + .identity_domain = &mtk_iommu_v1_identity_domain, + .domain_alloc_paging = mtk_iommu_v1_domain_alloc_paging, .probe_device = mtk_iommu_v1_probe_device, .probe_finalize = mtk_iommu_v1_probe_finalize, .release_device = mtk_iommu_v1_release_device, - .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, - .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 537e402f9bba..c66b070841dd 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1225,18 +1225,15 @@ static int omap_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, obj); if (omap_iommu_can_register(pdev)) { - obj->group = iommu_group_alloc(); - if (IS_ERR(obj->group)) - return PTR_ERR(obj->group); - err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); if (err) - goto out_group; + return err; err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev); if (err) goto out_sysfs; + obj->has_iommu_driver = true; } pm_runtime_enable(obj->dev); @@ -1252,8 +1249,6 @@ static int omap_iommu_probe(struct platform_device *pdev) out_sysfs: iommu_device_sysfs_remove(&obj->iommu); -out_group: - iommu_group_put(obj->group); return err; } @@ -1261,10 +1256,7 @@ static void omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); - if (obj->group) { - iommu_group_put(obj->group); - obj->group = NULL; - + if (obj->has_iommu_driver) { iommu_device_sysfs_remove(&obj->iommu); iommu_device_unregister(&obj->iommu); } @@ -1318,7 +1310,8 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz) } static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, - phys_addr_t pa, size_t bytes, int prot, gfp_t gfp) + phys_addr_t pa, size_t bytes, size_t count, + int prot, gfp_t gfp, size_t *mapped) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct device *dev = omap_domain->dev; @@ -1356,13 +1349,15 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, oiommu = iommu->iommu_dev; iopgtable_clear_entry(oiommu, da); } + } else { + *mapped = bytes; } return ret; } static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, - size_t size, struct iommu_iotlb_gather *gather) + size_t size, size_t count, struct iommu_iotlb_gather *gather) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct device *dev = omap_domain->dev; @@ -1555,23 +1550,35 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_domain->dev = NULL; } -static void omap_iommu_set_platform_dma(struct device *dev) +static int omap_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu_domain *omap_domain; + + if (domain == identity_domain || !domain) + return 0; + omap_domain = to_omap_domain(domain); spin_lock(&omap_domain->lock); _omap_iommu_detach_dev(omap_domain, dev); spin_unlock(&omap_domain->lock); + return 0; } -static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) +static struct iommu_domain_ops omap_iommu_identity_ops = { + .attach_dev = omap_iommu_identity_attach, +}; + +static struct iommu_domain omap_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &omap_iommu_identity_ops, +}; + +static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev) { struct omap_iommu_domain *omap_domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); if (!omap_domain) return NULL; @@ -1717,31 +1724,17 @@ static void omap_iommu_release_device(struct device *dev) } -static struct iommu_group *omap_iommu_device_group(struct device *dev) -{ - struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev); - struct iommu_group *group = ERR_PTR(-EINVAL); - - if (!arch_data) - return ERR_PTR(-ENODEV); - - if (arch_data->iommu_dev) - group = iommu_group_ref_get(arch_data->iommu_dev->group); - - return group; -} - static const struct iommu_ops omap_iommu_ops = { - .domain_alloc = omap_iommu_domain_alloc, + .identity_domain = &omap_iommu_identity_domain, + .domain_alloc_paging = omap_iommu_domain_alloc_paging, .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, - .device_group = omap_iommu_device_group, - .set_platform_dma_ops = omap_iommu_set_platform_dma, + .device_group = generic_single_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = omap_iommu_attach_dev, - .map = omap_iommu_map, - .unmap = omap_iommu_unmap, + .map_pages = omap_iommu_map, + .unmap_pages = omap_iommu_unmap, .iova_to_phys = omap_iommu_iova_to_phys, .free = omap_iommu_domain_free, } diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 18ee713ede78..27697109ec79 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -80,7 +80,7 @@ struct omap_iommu { u32 id; struct iommu_device iommu; - struct iommu_group *group; + bool has_iommu_driver; u8 pwrst; }; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 8ff69fbf9f65..2685861c0a12 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -113,7 +113,6 @@ struct rk_iommu { struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ - struct iommu_group *group; }; struct rk_iommudata { @@ -817,7 +816,8 @@ unwind: } static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t size, size_t count, + int prot, gfp_t gfp, size_t *mapped) { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; @@ -850,12 +850,14 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, paddr, size, prot); spin_unlock_irqrestore(&rk_domain->dt_lock, flags); + if (!ret) + *mapped = size; return ret; } static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t size, size_t count, struct iommu_iotlb_gather *gather) { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; @@ -989,13 +991,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain, return 0; } -static void rk_iommu_identity_free(struct iommu_domain *domain) -{ -} - static struct iommu_domain_ops rk_identity_ops = { .attach_dev = rk_iommu_identity_attach, - .free = rk_iommu_identity_free, }; static struct iommu_domain rk_identity_domain = { @@ -1003,13 +1000,6 @@ static struct iommu_domain rk_identity_domain = { .ops = &rk_identity_ops, }; -#ifdef CONFIG_ARM -static void rk_iommu_set_platform_dma(struct device *dev) -{ - WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); -} -#endif - static int rk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -1055,16 +1045,10 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, return ret; } -static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) +static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) { struct rk_iommu_domain *rk_domain; - if (type == IOMMU_DOMAIN_IDENTITY) - return &rk_identity_domain; - - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; - if (!dma_dev) return NULL; @@ -1155,15 +1139,6 @@ static void rk_iommu_release_device(struct device *dev) device_link_del(data->link); } -static struct iommu_group *rk_iommu_device_group(struct device *dev) -{ - struct rk_iommu *iommu; - - iommu = rk_iommu_from_dev(dev); - - return iommu_group_ref_get(iommu->group); -} - static int rk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { @@ -1186,19 +1161,17 @@ static int rk_iommu_of_xlate(struct device *dev, } static const struct iommu_ops rk_iommu_ops = { - .domain_alloc = rk_iommu_domain_alloc, + .identity_domain = &rk_identity_domain, + .domain_alloc_paging = rk_iommu_domain_alloc_paging, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, - .device_group = rk_iommu_device_group, -#ifdef CONFIG_ARM - .set_platform_dma_ops = rk_iommu_set_platform_dma, -#endif + .device_group = generic_single_device_group, .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = rk_iommu_attach_device, - .map = rk_iommu_map, - .unmap = rk_iommu_unmap, + .map_pages = rk_iommu_map, + .unmap_pages = rk_iommu_unmap, .iova_to_phys = rk_iommu_iova_to_phys, .free = rk_iommu_domain_free, } @@ -1280,15 +1253,9 @@ static int rk_iommu_probe(struct platform_device *pdev) if (err) return err; - iommu->group = iommu_group_alloc(); - if (IS_ERR(iommu->group)) { - err = PTR_ERR(iommu->group); - goto err_unprepare_clocks; - } - err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); if (err) - goto err_put_group; + goto err_unprepare_clocks; err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); if (err) @@ -1325,8 +1292,6 @@ err_pm_disable: pm_runtime_disable(dev); err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); -err_put_group: - iommu_group_put(iommu->group); err_unprepare_clocks: clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); return err; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index fbf59a8db29b..9a5196f523de 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -14,16 +14,300 @@ #include <linux/rcupdate.h> #include <asm/pci_dma.h> +#include "dma-iommu.h" + static const struct iommu_ops s390_iommu_ops; +static struct kmem_cache *dma_region_table_cache; +static struct kmem_cache *dma_page_table_cache; + +static u64 s390_iommu_aperture; +static u32 s390_iommu_aperture_factor = 1; + struct s390_domain { struct iommu_domain domain; struct list_head devices; + struct zpci_iommu_ctrs ctrs; unsigned long *dma_table; spinlock_t list_lock; struct rcu_head rcu; }; +static inline unsigned int calc_rtx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_sx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_px(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; +} + +static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) +{ + *entry &= ZPCI_PTE_FLAG_MASK; + *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); +} + +static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= (sto & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RTX; +} + +static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) +{ + *entry &= ZPCI_STE_FLAG_MASK; + *entry |= (pto & ZPCI_STE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_SX; +} + +static inline void validate_rt_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RTX; +} + +static inline void validate_st_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_VALID; +} + +static inline void invalidate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_INVALID; +} + +static inline void validate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_VALID; +} + +static inline void entry_set_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_PROTECTED; +} + +static inline void entry_clr_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_UNPROTECTED; +} + +static inline int reg_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; +} + +static inline int pt_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; +} + +static inline unsigned long *get_rt_sto(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) + return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); + else + return NULL; +} + +static inline unsigned long *get_st_pto(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) + return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); + else + return NULL; +} + +static int __init dma_alloc_cpu_table_caches(void) +{ + dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", + ZPCI_TABLE_SIZE, + ZPCI_TABLE_ALIGN, + 0, NULL); + if (!dma_region_table_cache) + return -ENOMEM; + + dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", + ZPCI_PT_SIZE, + ZPCI_PT_ALIGN, + 0, NULL); + if (!dma_page_table_cache) { + kmem_cache_destroy(dma_region_table_cache); + return -ENOMEM; + } + return 0; +} + +static unsigned long *dma_alloc_cpu_table(gfp_t gfp) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_region_table_cache, gfp); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) + *entry = ZPCI_TABLE_INVALID; + return table; +} + +static void dma_free_cpu_table(void *table) +{ + kmem_cache_free(dma_region_table_cache, table); +} + +static void dma_free_page_table(void *table) +{ + kmem_cache_free(dma_page_table_cache, table); +} + +static void dma_free_seg_table(unsigned long entry) +{ + unsigned long *sto = get_rt_sto(entry); + int sx; + + for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) + if (reg_entry_isvalid(sto[sx])) + dma_free_page_table(get_st_pto(sto[sx])); + + dma_free_cpu_table(sto); +} + +static void dma_cleanup_tables(unsigned long *table) +{ + int rtx; + + if (!table) + return; + + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(table[rtx])) + dma_free_seg_table(table[rtx]); + + dma_free_cpu_table(table); +} + +static unsigned long *dma_alloc_page_table(gfp_t gfp) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_page_table_cache, gfp); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) + *entry = ZPCI_PTE_INVALID; + return table; +} + +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) +{ + unsigned long old_rte, rte; + unsigned long *sto; + + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { + sto = dma_alloc_cpu_table(gfp); + if (!sto) + return NULL; + + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } + } + return sto; +} + +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) +{ + unsigned long old_ste, ste; + unsigned long *pto; + + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { + pto = dma_alloc_page_table(gfp); + if (!pto) + return NULL; + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } + } + return pto; +} + +static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp) +{ + unsigned long *sto, *pto; + unsigned int rtx, sx, px; + + rtx = calc_rtx(dma_addr); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); + if (!sto) + return NULL; + + sx = calc_sx(dma_addr); + pto = dma_get_page_table_origin(&sto[sx], gfp); + if (!pto) + return NULL; + + px = calc_px(dma_addr); + return &pto[px]; +} + +static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) +{ + unsigned long pte; + + pte = READ_ONCE(*ptep); + if (flags & ZPCI_PTE_INVALID) { + invalidate_pt_entry(&pte); + } else { + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); + } + + if (flags & ZPCI_TABLE_PROTECTED) + entry_set_protected(&pte); + else + entry_clr_protected(&pte); + + xchg(ptep, pte); +} + static struct s390_domain *to_s390_domain(struct iommu_domain *dom) { return container_of(dom, struct s390_domain, domain); @@ -31,21 +315,22 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom) static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) { + struct zpci_dev *zdev = to_zpci_dev(dev); + switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; + case IOMMU_CAP_DEFERRED_FLUSH: + return zdev->pft != PCI_FUNC_TYPE_ISM; default: return false; } } -static struct iommu_domain *s390_domain_alloc(unsigned domain_type) +static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) { struct s390_domain *s390_domain; - if (domain_type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); if (!s390_domain) return NULL; @@ -84,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain) call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); } -static void __s390_iommu_detach_device(struct zpci_dev *zdev) +static void s390_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) { - struct s390_domain *s390_domain = zdev->s390_domain; + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; - if (!s390_domain) - return; - spin_lock_irqsave(&s390_domain->list_lock, flags); list_del_rcu(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); @@ -118,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return -EINVAL; if (zdev->s390_domain) - __s390_iommu_detach_device(zdev); - else if (zdev->dma_table) - zpci_dma_exit_device(zdev); + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(s390_domain->dma_table), &status); @@ -130,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, */ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; - zdev->dma_table = s390_domain->dma_table; zdev->dma_table = s390_domain->dma_table; zdev->s390_domain = s390_domain; @@ -142,14 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void s390_iommu_set_platform_dma(struct device *dev) -{ - struct zpci_dev *zdev = to_zpci_dev(dev); - - __s390_iommu_detach_device(zdev); - zpci_dma_init_device(zdev); -} - static void s390_iommu_get_resv_regions(struct device *dev, struct list_head *list) { @@ -190,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + if (zdev->tlb_refresh) + dev->iommu->shadow_on_flush = 1; + return &zdev->iommu_dev; } @@ -202,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev) * to the device, but keep it attached to other devices in the group. */ if (zdev) - __s390_iommu_detach_device(zdev); + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); +} + +static int zpci_refresh_all(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); } static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) @@ -212,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { - zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); + atomic64_inc(&s390_domain->ctrs.global_rpcits); + zpci_refresh_all(zdev); } rcu_read_unlock(); } @@ -231,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain, rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { + atomic64_inc(&s390_domain->ctrs.sync_rpcits); zpci_refresh_trans((u64)zdev->fh << 32, gather->start, size); } rcu_read_unlock(); } -static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; + int ret = 0; rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { if (!zdev->tlb_refresh) continue; - zpci_refresh_trans((u64)zdev->fh << 32, - iova, size); + atomic64_inc(&s390_domain->ctrs.sync_map_rpcits); + ret = zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + /* + * let the hypervisor discover invalidated entries + * allowing it to free IOVAs and unpin pages + */ + if (ret == -ENOMEM) { + ret = zpci_refresh_all(zdev); + if (ret) + break; + } } rcu_read_unlock(); + + return ret; } static int s390_iommu_validate_trans(struct s390_domain *s390_domain, @@ -330,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, if (!IS_ALIGNED(iova | paddr, pgsize)) return -EINVAL; - if (!(prot & IOMMU_READ)) - return -EINVAL; - if (!(prot & IOMMU_WRITE)) flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags, gfp); - if (!rc) + pgcount, flags, gfp); + if (!rc) { *mapped = size; + atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages); + } return rc; } @@ -395,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, return 0; iommu_iotlb_gather_add_range(gather, iova, size); + atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages); return size; } +static void s390_iommu_probe_finalize(struct device *dev) +{ + iommu_setup_dma_ops(dev, 0, U64_MAX); +} + +struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) +{ + if (!zdev || !zdev->s390_domain) + return NULL; + return &zdev->s390_domain->ctrs; +} + int zpci_init_iommu(struct zpci_dev *zdev) { + u64 aperture_size; int rc = 0; rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, @@ -412,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev) if (rc) goto out_sysfs; + zdev->start_dma = PAGE_ALIGN(zdev->start_dma); + aperture_size = min3(s390_iommu_aperture, + ZPCI_TABLE_SIZE_RT - zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + zdev->end_dma = zdev->start_dma + aperture_size - 1; + return 0; out_sysfs: @@ -427,13 +742,52 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +static int __init s390_iommu_setup(char *str) +{ + if (!strcmp(str, "strict")) { + pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n"); + iommu_set_dma_strict(); + } + return 1; +} + +__setup("s390_iommu=", s390_iommu_setup); + +static int __init s390_iommu_aperture_setup(char *str) +{ + if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) + s390_iommu_aperture_factor = 1; + return 1; +} + +__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); + +static int __init s390_iommu_init(void) +{ + int rc; + + iommu_dma_forcedac = true; + s390_iommu_aperture = (u64)virt_to_phys(high_memory); + if (!s390_iommu_aperture_factor) + s390_iommu_aperture = ULONG_MAX; + else + s390_iommu_aperture *= s390_iommu_aperture_factor; + + rc = dma_alloc_cpu_table_caches(); + if (rc) + return rc; + + return rc; +} +subsys_initcall(s390_iommu_init); + static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, - .domain_alloc = s390_domain_alloc, + .domain_alloc_paging = s390_domain_alloc_paging, .probe_device = s390_iommu_probe_device, + .probe_finalize = s390_iommu_probe_finalize, .release_device = s390_iommu_release_device, .device_group = generic_device_group, - .set_platform_dma_ops = s390_iommu_set_platform_dma, .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 2fa9afebd4f5..2eb9fb46703b 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -70,7 +70,6 @@ struct sprd_iommu_device { void __iomem *base; struct device *dev; struct iommu_device iommu; - struct iommu_group *group; struct clk *eb; }; @@ -134,13 +133,10 @@ sprd_iommu_pgt_size(struct iommu_domain *domain) SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32); } -static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) +static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev) { struct sprd_iommu_domain *dom; - if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -345,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return size; } -static void sprd_iommu_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sprd_iommu_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned int reg; @@ -358,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain, /* clear IOMMU TLB buffer after page table updated */ sprd_iommu_write(dom->sdev, reg, 0xffffffff); + return 0; } static void sprd_iommu_sync(struct iommu_domain *domain, @@ -399,13 +396,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev) return &sdev->iommu; } -static struct iommu_group *sprd_iommu_device_group(struct device *dev) -{ - struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); - - return iommu_group_ref_get(sdev->group); -} - static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { struct platform_device *pdev; @@ -421,9 +411,9 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops sprd_iommu_ops = { - .domain_alloc = sprd_iommu_domain_alloc, + .domain_alloc_paging = sprd_iommu_domain_alloc_paging, .probe_device = sprd_iommu_probe_device, - .device_group = sprd_iommu_device_group, + .device_group = generic_single_device_group, .of_xlate = sprd_iommu_of_xlate, .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, @@ -496,16 +486,9 @@ static int sprd_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sdev); sdev->dev = dev; - /* All the client devices are in the same iommu-group */ - sdev->group = iommu_group_alloc(); - if (IS_ERR(sdev->group)) { - ret = PTR_ERR(sdev->group); - goto free_page; - } - ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev)); if (ret) - goto put_group; + goto free_page; ret = iommu_device_register(&sdev->iommu, &sprd_iommu_ops, dev); if (ret) @@ -530,8 +513,6 @@ unregister_iommu: iommu_device_unregister(&sdev->iommu); remove_sysfs: iommu_device_sysfs_remove(&sdev->iommu); -put_group: - iommu_group_put(sdev->group); free_page: dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); return ret; @@ -543,9 +524,6 @@ static void sprd_iommu_remove(struct platform_device *pdev) dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); - iommu_group_put(sdev->group); - sdev->group = NULL; - platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&sdev->iommu); iommu_device_unregister(&sdev->iommu); diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 74c5cb93e900..41484a5a399b 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -107,7 +107,6 @@ struct sun50i_iommu { struct clk *clk; struct iommu_domain *domain; - struct iommu_group *group; struct kmem_cache *pt_pool; }; @@ -402,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } -static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = sun50i_domain->iommu; @@ -412,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, spin_lock_irqsave(&iommu->iommu_lock, flags); sun50i_iommu_zap_range(iommu, iova, size); spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + return 0; } static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, @@ -589,7 +590,8 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, } static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t size, size_t count, + int prot, gfp_t gfp, size_t *mapped) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = sun50i_domain->iommu; @@ -616,13 +618,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, *pte_addr = sun50i_mk_pte(paddr, prot); sun50i_table_flush(sun50i_domain, pte_addr, 1); + *mapped = size; out: return ret; } static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t size, size_t count, struct iommu_iotlb_gather *gather) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); phys_addr_t pt_phys; @@ -667,14 +670,11 @@ static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain, sun50i_iova_get_page_offset(iova); } -static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) +static struct iommu_domain * +sun50i_iommu_domain_alloc_paging(struct device *dev) { struct sun50i_iommu_domain *sun50i_domain; - if (type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL); if (!sun50i_domain) return NULL; @@ -757,21 +757,32 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu, iommu->domain = NULL; } -static void sun50i_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { - struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = dev_iommu_priv_get(dev); + struct sun50i_iommu_domain *sun50i_domain; dev_dbg(dev, "Detaching from IOMMU domain\n"); - if (iommu->domain != domain) - return; + if (iommu->domain == identity_domain) + return 0; + sun50i_domain = to_sun50i_domain(iommu->domain); if (refcount_dec_and_test(&sun50i_domain->refcnt)) sun50i_iommu_detach_domain(iommu, sun50i_domain); + return 0; } +static struct iommu_domain_ops sun50i_iommu_identity_ops = { + .attach_dev = sun50i_iommu_identity_attach, +}; + +static struct iommu_domain sun50i_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &sun50i_iommu_identity_ops, +}; + static int sun50i_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -789,8 +800,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain, if (iommu->domain == domain) return 0; - if (iommu->domain) - sun50i_iommu_detach_device(iommu->domain, dev); + sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev); sun50i_iommu_attach_domain(iommu, sun50i_domain); @@ -808,13 +818,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev) return &iommu->iommu; } -static struct iommu_group *sun50i_iommu_device_group(struct device *dev) -{ - struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev); - - return iommu_group_ref_get(iommu->group); -} - static int sun50i_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { @@ -827,9 +830,10 @@ static int sun50i_iommu_of_xlate(struct device *dev, } static const struct iommu_ops sun50i_iommu_ops = { + .identity_domain = &sun50i_iommu_identity_domain, .pgsize_bitmap = SZ_4K, - .device_group = sun50i_iommu_device_group, - .domain_alloc = sun50i_iommu_domain_alloc, + .device_group = generic_single_device_group, + .domain_alloc_paging = sun50i_iommu_domain_alloc_paging, .of_xlate = sun50i_iommu_of_xlate, .probe_device = sun50i_iommu_probe_device, .default_domain_ops = &(const struct iommu_domain_ops) { @@ -838,8 +842,8 @@ static const struct iommu_ops sun50i_iommu_ops = { .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, .iova_to_phys = sun50i_iommu_iova_to_phys, - .map = sun50i_iommu_map, - .unmap = sun50i_iommu_unmap, + .map_pages = sun50i_iommu_map, + .unmap_pages = sun50i_iommu_unmap, .free = sun50i_iommu_domain_free, } }; @@ -985,6 +989,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; spin_lock_init(&iommu->iommu_lock); + iommu->domain = &sun50i_iommu_identity_domain; platform_set_drvdata(pdev, iommu); iommu->dev = &pdev->dev; @@ -995,42 +1000,36 @@ static int sun50i_iommu_probe(struct platform_device *pdev) if (!iommu->pt_pool) return -ENOMEM; - iommu->group = iommu_group_alloc(); - if (IS_ERR(iommu->group)) { - ret = PTR_ERR(iommu->group); - goto err_free_cache; - } - iommu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(iommu->base)) { ret = PTR_ERR(iommu->base); - goto err_free_group; + goto err_free_cache; } irq = platform_get_irq(pdev, 0); if (irq < 0) { ret = irq; - goto err_free_group; + goto err_free_cache; } iommu->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(iommu->clk)) { dev_err(&pdev->dev, "Couldn't get our clock.\n"); ret = PTR_ERR(iommu->clk); - goto err_free_group; + goto err_free_cache; } iommu->reset = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(iommu->reset)) { dev_err(&pdev->dev, "Couldn't get our reset line.\n"); ret = PTR_ERR(iommu->reset); - goto err_free_group; + goto err_free_cache; } ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); if (ret) - goto err_free_group; + goto err_free_cache; ret = iommu_device_register(&iommu->iommu, &sun50i_iommu_ops, &pdev->dev); if (ret) @@ -1049,9 +1048,6 @@ err_unregister: err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); -err_free_group: - iommu_group_put(iommu->group); - err_free_cache: kmem_cache_destroy(iommu->pt_pool); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c deleted file mode 100644 index a482ff838b53..000000000000 --- a/drivers/iommu/tegra-gart.c +++ /dev/null @@ -1,371 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * IOMMU API for Graphics Address Relocation Table on Tegra20 - * - * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. - * - * Author: Hiroshi DOYU <hdoyu@nvidia.com> - */ - -#define dev_fmt(fmt) "gart: " fmt - -#include <linux/io.h> -#include <linux/iommu.h> -#include <linux/moduleparam.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/vmalloc.h> - -#include <soc/tegra/mc.h> - -#define GART_REG_BASE 0x24 -#define GART_CONFIG (0x24 - GART_REG_BASE) -#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE) -#define GART_ENTRY_DATA (0x2c - GART_REG_BASE) - -#define GART_ENTRY_PHYS_ADDR_VALID BIT(31) - -#define GART_PAGE_SHIFT 12 -#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT) -#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT) - -/* bitmap of the page sizes currently supported */ -#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE) - -struct gart_device { - void __iomem *regs; - u32 *savedata; - unsigned long iovmm_base; /* offset to vmm_area start */ - unsigned long iovmm_end; /* offset to vmm_area end */ - spinlock_t pte_lock; /* for pagetable */ - spinlock_t dom_lock; /* for active domain */ - unsigned int active_devices; /* number of active devices */ - struct iommu_domain *active_domain; /* current active domain */ - struct iommu_device iommu; /* IOMMU Core handle */ - struct device *dev; -}; - -static struct gart_device *gart_handle; /* unique for a system */ - -static bool gart_debug; - -/* - * Any interaction between any block on PPSB and a block on APB or AHB - * must have these read-back to ensure the APB/AHB bus transaction is - * complete before initiating activity on the PPSB block. - */ -#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG) - -#define for_each_gart_pte(gart, iova) \ - for (iova = gart->iovmm_base; \ - iova < gart->iovmm_end; \ - iova += GART_PAGE_SIZE) - -static inline void gart_set_pte(struct gart_device *gart, - unsigned long iova, unsigned long pte) -{ - writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); - writel_relaxed(pte, gart->regs + GART_ENTRY_DATA); -} - -static inline unsigned long gart_read_pte(struct gart_device *gart, - unsigned long iova) -{ - unsigned long pte; - - writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); - pte = readl_relaxed(gart->regs + GART_ENTRY_DATA); - - return pte; -} - -static void do_gart_setup(struct gart_device *gart, const u32 *data) -{ - unsigned long iova; - - for_each_gart_pte(gart, iova) - gart_set_pte(gart, iova, data ? *(data++) : 0); - - writel_relaxed(1, gart->regs + GART_CONFIG); - FLUSH_GART_REGS(gart); -} - -static inline bool gart_iova_range_invalid(struct gart_device *gart, - unsigned long iova, size_t bytes) -{ - return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE || - iova + bytes > gart->iovmm_end); -} - -static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova) -{ - return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID); -} - -static int gart_iommu_attach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct gart_device *gart = gart_handle; - int ret = 0; - - spin_lock(&gart->dom_lock); - - if (gart->active_domain && gart->active_domain != domain) { - ret = -EINVAL; - } else if (dev_iommu_priv_get(dev) != domain) { - dev_iommu_priv_set(dev, domain); - gart->active_domain = domain; - gart->active_devices++; - } - - spin_unlock(&gart->dom_lock); - - return ret; -} - -static void gart_iommu_set_platform_dma(struct device *dev) -{ - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct gart_device *gart = gart_handle; - - spin_lock(&gart->dom_lock); - - if (dev_iommu_priv_get(dev) == domain) { - dev_iommu_priv_set(dev, NULL); - - if (--gart->active_devices == 0) - gart->active_domain = NULL; - } - - spin_unlock(&gart->dom_lock); -} - -static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) -{ - struct iommu_domain *domain; - - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (domain) { - domain->geometry.aperture_start = gart_handle->iovmm_base; - domain->geometry.aperture_end = gart_handle->iovmm_end - 1; - domain->geometry.force_aperture = true; - } - - return domain; -} - -static void gart_iommu_domain_free(struct iommu_domain *domain) -{ - WARN_ON(gart_handle->active_domain == domain); - kfree(domain); -} - -static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova, - unsigned long pa) -{ - if (unlikely(gart_debug && gart_pte_valid(gart, iova))) { - dev_err(gart->dev, "Page entry is in-use\n"); - return -EINVAL; - } - - gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa); - - return 0; -} - -static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t bytes, int prot, gfp_t gfp) -{ - struct gart_device *gart = gart_handle; - int ret; - - if (gart_iova_range_invalid(gart, iova, bytes)) - return -EINVAL; - - spin_lock(&gart->pte_lock); - ret = __gart_iommu_map(gart, iova, (unsigned long)pa); - spin_unlock(&gart->pte_lock); - - return ret; -} - -static inline int __gart_iommu_unmap(struct gart_device *gart, - unsigned long iova) -{ - if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) { - dev_err(gart->dev, "Page entry is invalid\n"); - return -EINVAL; - } - - gart_set_pte(gart, iova, 0); - - return 0; -} - -static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t bytes, struct iommu_iotlb_gather *gather) -{ - struct gart_device *gart = gart_handle; - int err; - - if (gart_iova_range_invalid(gart, iova, bytes)) - return 0; - - spin_lock(&gart->pte_lock); - err = __gart_iommu_unmap(gart, iova); - spin_unlock(&gart->pte_lock); - - return err ? 0 : bytes; -} - -static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) -{ - struct gart_device *gart = gart_handle; - unsigned long pte; - - if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE)) - return -EINVAL; - - spin_lock(&gart->pte_lock); - pte = gart_read_pte(gart, iova); - spin_unlock(&gart->pte_lock); - - return pte & GART_PAGE_MASK; -} - -static struct iommu_device *gart_iommu_probe_device(struct device *dev) -{ - if (!dev_iommu_fwspec_get(dev)) - return ERR_PTR(-ENODEV); - - return &gart_handle->iommu; -} - -static int gart_iommu_of_xlate(struct device *dev, - struct of_phandle_args *args) -{ - return 0; -} - -static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) -{ - FLUSH_GART_REGS(gart_handle); -} - -static void gart_iommu_sync(struct iommu_domain *domain, - struct iommu_iotlb_gather *gather) -{ - size_t length = gather->end - gather->start + 1; - - gart_iommu_sync_map(domain, gather->start, length); -} - -static const struct iommu_ops gart_iommu_ops = { - .domain_alloc = gart_iommu_domain_alloc, - .probe_device = gart_iommu_probe_device, - .device_group = generic_device_group, - .set_platform_dma_ops = gart_iommu_set_platform_dma, - .pgsize_bitmap = GART_IOMMU_PGSIZES, - .of_xlate = gart_iommu_of_xlate, - .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = gart_iommu_attach_dev, - .map = gart_iommu_map, - .unmap = gart_iommu_unmap, - .iova_to_phys = gart_iommu_iova_to_phys, - .iotlb_sync_map = gart_iommu_sync_map, - .iotlb_sync = gart_iommu_sync, - .free = gart_iommu_domain_free, - } -}; - -int tegra_gart_suspend(struct gart_device *gart) -{ - u32 *data = gart->savedata; - unsigned long iova; - - /* - * All GART users shall be suspended at this point. Disable - * address translation to trap all GART accesses as invalid - * memory accesses. - */ - writel_relaxed(0, gart->regs + GART_CONFIG); - FLUSH_GART_REGS(gart); - - for_each_gart_pte(gart, iova) - *(data++) = gart_read_pte(gart, iova); - - return 0; -} - -int tegra_gart_resume(struct gart_device *gart) -{ - do_gart_setup(gart, gart->savedata); - - return 0; -} - -struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) -{ - struct gart_device *gart; - struct resource *res; - int err; - - BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); - - /* the GART memory aperture is required */ - res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1); - if (!res) { - dev_err(dev, "Memory aperture resource unavailable\n"); - return ERR_PTR(-ENXIO); - } - - gart = kzalloc(sizeof(*gart), GFP_KERNEL); - if (!gart) - return ERR_PTR(-ENOMEM); - - gart_handle = gart; - - gart->dev = dev; - gart->regs = mc->regs + GART_REG_BASE; - gart->iovmm_base = res->start; - gart->iovmm_end = res->end + 1; - spin_lock_init(&gart->pte_lock); - spin_lock_init(&gart->dom_lock); - - do_gart_setup(gart, NULL); - - err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); - if (err) - goto free_gart; - - err = iommu_device_register(&gart->iommu, &gart_iommu_ops, dev); - if (err) - goto remove_sysfs; - - gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE * - sizeof(u32)); - if (!gart->savedata) { - err = -ENOMEM; - goto unregister_iommu; - } - - return gart; - -unregister_iommu: - iommu_device_unregister(&gart->iommu); -remove_sysfs: - iommu_device_sysfs_remove(&gart->iommu); -free_gart: - kfree(gart); - - return ERR_PTR(err); -} - -module_param(gart_debug, bool, 0644); -MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e445f80d0226..310871728ab4 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -272,13 +272,10 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id) clear_bit(id, smmu->asids); } -static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) +static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) { struct tegra_smmu_as *as; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - as = kzalloc(sizeof(*as), GFP_KERNEL); if (!as) return NULL; @@ -511,23 +508,39 @@ disable: return err; } -static void tegra_smmu_set_platform_dma(struct device *dev) +static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct tegra_smmu_as *as = to_smmu_as(domain); - struct tegra_smmu *smmu = as->smmu; + struct tegra_smmu_as *as; + struct tegra_smmu *smmu; unsigned int index; if (!fwspec) - return; + return -ENODEV; + if (domain == identity_domain || !domain) + return 0; + + as = to_smmu_as(domain); + smmu = as->smmu; for (index = 0; index < fwspec->num_ids; index++) { tegra_smmu_disable(smmu, fwspec->ids[index], as->id); tegra_smmu_as_unprepare(smmu, as); } + return 0; } +static struct iommu_domain_ops tegra_smmu_identity_ops = { + .attach_dev = tegra_smmu_identity_attach, +}; + +static struct iommu_domain tegra_smmu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &tegra_smmu_identity_ops, +}; + static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, u32 value) { @@ -751,7 +764,8 @@ __tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, } static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t size, size_t count, + int prot, gfp_t gfp, size_t *mapped) { struct tegra_smmu_as *as = to_smmu_as(domain); unsigned long flags; @@ -761,11 +775,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags); spin_unlock_irqrestore(&as->lock, flags); + if (!ret) + *mapped = size; + return ret; } static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t size, size_t count, struct iommu_iotlb_gather *gather) { struct tegra_smmu_as *as = to_smmu_as(domain); unsigned long flags; @@ -962,17 +979,28 @@ static int tegra_smmu_of_xlate(struct device *dev, return iommu_fwspec_add_ids(dev, &id, 1); } +static int tegra_smmu_def_domain_type(struct device *dev) +{ + /* + * FIXME: For now we want to run all translation in IDENTITY mode, due + * to some device quirks. Better would be to just quirk the troubled + * devices. + */ + return IOMMU_DOMAIN_IDENTITY; +} + static const struct iommu_ops tegra_smmu_ops = { - .domain_alloc = tegra_smmu_domain_alloc, + .identity_domain = &tegra_smmu_identity_domain, + .def_domain_type = &tegra_smmu_def_domain_type, + .domain_alloc_paging = tegra_smmu_domain_alloc_paging, .probe_device = tegra_smmu_probe_device, .device_group = tegra_smmu_device_group, - .set_platform_dma_ops = tegra_smmu_set_platform_dma, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = tegra_smmu_attach_dev, - .map = tegra_smmu_map, - .unmap = tegra_smmu_unmap, + .map_pages = tegra_smmu_map, + .unmap_pages = tegra_smmu_unmap, .iova_to_phys = tegra_smmu_iova_to_phys, .free = tegra_smmu_domain_free, } @@ -1056,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients); static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu) { smmu->debugfs = debugfs_create_dir("smmu", NULL); - if (!smmu->debugfs) - return; debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu, &tegra_smmu_swgroups_fops); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 17dcd826f5c2..379ebe03efb6 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -85,7 +85,7 @@ struct viommu_request { void *writeback; unsigned int write_offset; unsigned int len; - char buf[]; + char buf[] __counted_by(len); }; #define VIOMMU_FAULT_RESV_MASK 0xffffff00 @@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len, if (write_offset <= 0) return -EINVAL; - req = kzalloc(sizeof(*req) + len, GFP_ATOMIC); + req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC); if (!req) return -ENOMEM; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 68d11ccee441..98b0329b7154 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -39,8 +39,7 @@ #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) -#define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2) -#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 3) +#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) @@ -106,7 +105,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 * interrupt. */ -DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); EXPORT_SYMBOL(gic_nonsecure_priorities); @@ -1779,15 +1778,6 @@ static bool gic_enable_quirk_msm8996(void *data) return true; } -static bool gic_enable_quirk_mtk_gicr(void *data) -{ - struct gic_chip_data *d = data; - - d->flags |= FLAGS_WORKAROUND_MTK_GICR_SAVE; - - return true; -} - static bool gic_enable_quirk_cavium_38539(void *data) { struct gic_chip_data *d = data; @@ -1889,11 +1879,6 @@ static const struct gic_quirk gic_quirks[] = { .init = gic_enable_quirk_asr8601, }, { - .desc = "GICv3: Mediatek Chromebook GICR save problem", - .property = "mediatek,broken-save-restore-fw", - .init = gic_enable_quirk_mtk_gicr, - }, - { .desc = "GICv3: HIP06 erratum 161010803", .iidr = 0x0204043b, .mask = 0xffffffff, @@ -1959,11 +1944,6 @@ static void gic_enable_nmi_support(void) if (!gic_prio_masking_enabled()) return; - if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { - pr_warn("Skipping NMI enable due to firmware issues\n"); - return; - } - rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, sizeof(*rdist_nmi_refs), GFP_KERNEL); if (!rdist_nmi_refs) diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c index bf03abb94e68..68d82a682bf6 100644 --- a/drivers/leds/rgb/leds-qcom-lpg.c +++ b/drivers/leds/rgb/leds-qcom-lpg.c @@ -1085,7 +1085,6 @@ static const struct pwm_ops lpg_pwm_ops = { .request = lpg_pwm_request, .apply = lpg_pwm_apply, .get_state = lpg_pwm_get_state, - .owner = THIS_MODULE, }; static int lpg_add_pwm(struct lpg *lpg) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 67d6e70b4eab..a083921a8968 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -979,35 +979,6 @@ static int tegra_mc_probe(struct platform_device *pdev) } } - if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) { - mc->gart = tegra_gart_probe(&pdev->dev, mc); - if (IS_ERR(mc->gart)) { - dev_err(&pdev->dev, "failed to probe GART: %ld\n", - PTR_ERR(mc->gart)); - mc->gart = NULL; - } - } - - return 0; -} - -static int __maybe_unused tegra_mc_suspend(struct device *dev) -{ - struct tegra_mc *mc = dev_get_drvdata(dev); - - if (mc->soc->ops && mc->soc->ops->suspend) - return mc->soc->ops->suspend(mc); - - return 0; -} - -static int __maybe_unused tegra_mc_resume(struct device *dev) -{ - struct tegra_mc *mc = dev_get_drvdata(dev); - - if (mc->soc->ops && mc->soc->ops->resume) - return mc->soc->ops->resume(mc); - return 0; } @@ -1020,15 +991,10 @@ static void tegra_mc_sync_state(struct device *dev) icc_sync_state(dev); } -static const struct dev_pm_ops tegra_mc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tegra_mc_suspend, tegra_mc_resume) -}; - static struct platform_driver tegra_mc_driver = { .driver = { .name = "tegra-mc", .of_match_table = tegra_mc_of_match, - .pm = &tegra_mc_pm_ops, .suppress_bind_attrs = true, .sync_state = tegra_mc_sync_state, }, diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c index 544bfd216a22..aa4b97d5e732 100644 --- a/drivers/memory/tegra/tegra20.c +++ b/drivers/memory/tegra/tegra20.c @@ -688,32 +688,6 @@ static int tegra20_mc_probe(struct tegra_mc *mc) return 0; } -static int tegra20_mc_suspend(struct tegra_mc *mc) -{ - int err; - - if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { - err = tegra_gart_suspend(mc->gart); - if (err < 0) - return err; - } - - return 0; -} - -static int tegra20_mc_resume(struct tegra_mc *mc) -{ - int err; - - if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { - err = tegra_gart_resume(mc->gart); - if (err < 0) - return err; - } - - return 0; -} - static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) { struct tegra_mc *mc = data; @@ -789,8 +763,6 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) static const struct tegra_mc_ops tegra20_mc_ops = { .probe = tegra20_mc_probe, - .suspend = tegra20_mc_suspend, - .resume = tegra20_mc_resume, .handle_irq = tegra20_mc_handle_irq, }; diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c index fc28714ae3a6..6a33889d0902 100644 --- a/drivers/misc/lkdtm/cfi.c +++ b/drivers/misc/lkdtm/cfi.c @@ -68,12 +68,20 @@ static void lkdtm_CFI_FORWARD_PROTO(void) #define no_pac_addr(addr) \ ((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET)) +#ifdef CONFIG_RISCV +/* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#frame-pointer-convention */ +#define FRAME_RA_OFFSET (-1) +#else +#define FRAME_RA_OFFSET 1 +#endif + /* The ultimate ROP gadget. */ static noinline __no_ret_protection void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr) { /* Use of volatile is to make sure final write isn't seen as a dead store. */ - unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1; + unsigned long * volatile *ret_addr = + (unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET; /* Make sure we've found the right place on the stack before writing it. */ if (no_pac_addr(*ret_addr) == expected) @@ -88,7 +96,8 @@ static noinline void set_return_addr(unsigned long *expected, unsigned long *addr) { /* Use of volatile is to make sure final write isn't seen as a dead store. */ - unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1; + unsigned long * volatile *ret_addr = + (unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET; /* Make sure we've found the right place on the stack before writing it. */ if (no_pac_addr(*ret_addr) == expected) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 3a8f27c3e310..152dfe593c43 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2381,8 +2381,10 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) } ret = mmc_blk_cqe_issue_flush(mq, req); break; - case REQ_OP_READ: case REQ_OP_WRITE: + card->written_flag = true; + fallthrough; + case REQ_OP_READ: if (host->cqe_enabled) ret = mmc_blk_cqe_issue_rw_rq(mq, req); else diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 4edf9057fa79..b7754a1b8d97 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -280,4 +280,8 @@ static inline int mmc_card_broken_sd_cache(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_SD_CACHE; } +static inline int mmc_card_broken_cache_flush(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_CACHE_FLUSH; +} #endif diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8180983bd402..705942edacc6 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card) case 3: /* MMC v3.1 - v3.3 */ case 4: /* MMC v4 */ card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 8); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); @@ -2086,13 +2086,17 @@ static int _mmc_flush_cache(struct mmc_host *host) { int err = 0; + if (mmc_card_broken_cache_flush(host->card) && !host->card->written_flag) + return 0; + if (_mmc_cache_enabled(host)) { err = mmc_switch(host->card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_FLUSH_CACHE, 1, CACHE_FLUSH_TIMEOUT_MS); if (err) - pr_err("%s: cache flush error %d\n", - mmc_hostname(host), err); + pr_err("%s: cache flush error %d\n", mmc_hostname(host), err); + else + host->card->written_flag = false; } return err; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 32b64b564fb1..cca71867bc4a 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -110,11 +110,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_QUIRK_TRIM_BROKEN), /* - * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to - * support being used to offload WRITE_ZEROES. + * Micron MTFC4GACAJCN-1M supports TRIM but does not appear to support + * WRITE_ZEROES offloading. It also supports caching, but the cache can + * only be flushed after a write has occurred. */ MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc, - MMC_QUIRK_TRIM_BROKEN), + MMC_QUIRK_TRIM_BROKEN | MMC_QUIRK_BROKEN_CACHE_FLUSH), /* * Kingston EMMC04G-M627 advertises TRIM but it does not seems to diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index d83261e857a5..d8a991b349a8 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -28,6 +28,9 @@ #define PCI_GLI_9750_PM_CTRL 0xFC #define PCI_GLI_9750_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9750_CORRERR_MASK 0x214 +#define PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9750_CFG2 0x848 #define SDHCI_GLI_9750_CFG2_L1DLY GENMASK(28, 24) #define GLI_9750_CFG2_L1DLY_VALUE 0x1F @@ -152,6 +155,9 @@ #define PCI_GLI_9755_PM_CTRL 0xFC #define PCI_GLI_9755_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9755_CORRERR_MASK 0x214 +#define PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9767_GM_BURST_SIZE 0x510 #define SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET BIT(8) @@ -561,6 +567,11 @@ static void gl9750_hw_setting(struct sdhci_host *host) value &= ~PCI_GLI_9750_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, &value); + value |= PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, value); + gl9750_wt_off(host); } @@ -770,6 +781,11 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot) value &= ~PCI_GLI_9755_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value); + value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value); + gl9755_wt_off(pdev); } diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index c125485ba80e..967bd2dfcda1 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -598,7 +598,7 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host, return 0; } - for (i = MMC_TIMING_MMC_HS; i <= MMC_TIMING_MMC_HS400; i++) { + for (i = MMC_TIMING_LEGACY; i <= MMC_TIMING_MMC_HS400; i++) { ret = device_property_read_u32(dev, td[i].otap_binding, &sdhci_am654->otap_del_sel[i]); diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index de3f443f5fdc..fd67c0682b38 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -2309,6 +2309,7 @@ static int vub300_probe(struct usb_interface *interface, vub300->read_only = (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0; } else { + retval = -EINVAL; goto error5; } usb_set_intfdata(interface, vub300); diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig index 60738edcd5d5..da03ab6efe04 100644 --- a/drivers/mtd/parsers/Kconfig +++ b/drivers/mtd/parsers/Kconfig @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config MTD_AR7_PARTS - tristate "TI AR7 partitioning parser" - help - TI AR7 partitioning parser support - config MTD_BCM47XX_PARTS tristate "BCM47XX partitioning parser" depends on BCM47XX || ARCH_BCM_5301X diff --git a/drivers/mtd/parsers/Makefile b/drivers/mtd/parsers/Makefile index 0e70b621a1d8..9b00c62b837a 100644 --- a/drivers/mtd/parsers/Makefile +++ b/drivers/mtd/parsers/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_MTD_AR7_PARTS) += ar7part.o obj-$(CONFIG_MTD_BCM47XX_PARTS) += bcm47xxpart.o obj-$(CONFIG_MTD_BCM63XX_PARTS) += bcm63xxpart.o obj-$(CONFIG_MTD_BRCM_U_BOOT) += brcm_u-boot.o diff --git a/drivers/mtd/parsers/ar7part.c b/drivers/mtd/parsers/ar7part.c deleted file mode 100644 index 8cd683711ac6..000000000000 --- a/drivers/mtd/parsers/ar7part.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright © 2007 Eugene Konev <ejka@openwrt.org> - * - * TI AR7 flash partition table. - * Based on ar7 map by Felix Fietkau <nbd@openwrt.org> - */ - -#include <linux/kernel.h> -#include <linux/slab.h> - -#include <linux/mtd/mtd.h> -#include <linux/mtd/partitions.h> -#include <linux/memblock.h> -#include <linux/module.h> - -#include <uapi/linux/magic.h> - -#define AR7_PARTS 4 -#define ROOT_OFFSET 0xe0000 - -#define LOADER_MAGIC1 le32_to_cpu(0xfeedfa42) -#define LOADER_MAGIC2 le32_to_cpu(0xfeed1281) - -struct ar7_bin_rec { - unsigned int checksum; - unsigned int length; - unsigned int address; -}; - -static int create_mtd_partitions(struct mtd_info *master, - const struct mtd_partition **pparts, - struct mtd_part_parser_data *data) -{ - struct ar7_bin_rec header; - unsigned int offset; - size_t len; - unsigned int pre_size = master->erasesize, post_size = 0; - unsigned int root_offset = ROOT_OFFSET; - - int retries = 10; - struct mtd_partition *ar7_parts; - - ar7_parts = kcalloc(AR7_PARTS, sizeof(*ar7_parts), GFP_KERNEL); - if (!ar7_parts) - return -ENOMEM; - ar7_parts[0].name = "loader"; - ar7_parts[0].offset = 0; - ar7_parts[0].size = master->erasesize; - ar7_parts[0].mask_flags = MTD_WRITEABLE; - - ar7_parts[1].name = "config"; - ar7_parts[1].offset = 0; - ar7_parts[1].size = master->erasesize; - ar7_parts[1].mask_flags = 0; - - do { /* Try 10 blocks starting from master->erasesize */ - offset = pre_size; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - if (!strncmp((char *)&header, "TIENV0.8", 8)) - ar7_parts[1].offset = pre_size; - if (header.checksum == LOADER_MAGIC1) - break; - if (header.checksum == LOADER_MAGIC2) - break; - pre_size += master->erasesize; - } while (retries--); - - pre_size = offset; - - if (!ar7_parts[1].offset) { - ar7_parts[1].offset = master->size - master->erasesize; - post_size = master->erasesize; - } - - switch (header.checksum) { - case LOADER_MAGIC1: - while (header.length) { - offset += sizeof(header) + header.length; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - } - root_offset = offset + sizeof(header) + 4; - break; - case LOADER_MAGIC2: - while (header.length) { - offset += sizeof(header) + header.length; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - } - root_offset = offset + sizeof(header) + 4 + 0xff; - root_offset &= ~(uint32_t)0xff; - break; - default: - printk(KERN_WARNING "Unknown magic: %08x\n", header.checksum); - break; - } - - mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header); - if (header.checksum != SQUASHFS_MAGIC) { - root_offset += master->erasesize - 1; - root_offset &= ~(master->erasesize - 1); - } - - ar7_parts[2].name = "linux"; - ar7_parts[2].offset = pre_size; - ar7_parts[2].size = master->size - pre_size - post_size; - ar7_parts[2].mask_flags = 0; - - ar7_parts[3].name = "rootfs"; - ar7_parts[3].offset = root_offset; - ar7_parts[3].size = master->size - root_offset - post_size; - ar7_parts[3].mask_flags = 0; - - *pparts = ar7_parts; - return AR7_PARTS; -} - -static struct mtd_part_parser ar7_parser = { - .parse_fn = create_mtd_partitions, - .name = "ar7part", -}; -module_mtd_part_parser(ar7_parser); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR( "Felix Fietkau <nbd@openwrt.org>, " - "Eugene Konev <ejka@openwrt.org>"); -MODULE_DESCRIPTION("MTD partitioning for TI AR7"); diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index d8ab2b77d201..167a86f39f27 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -32,7 +32,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val) struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; reg <<= 2; /* reg num to offset */ - mutex_lock(&sw_dev->device->bus->mdio_lock); + mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED); lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff); lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff); mutex_unlock(&sw_dev->device->bus->mdio_lock); @@ -50,7 +50,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; reg <<= 2; /* reg num to offset */ - mutex_lock(&sw_dev->device->bus->mdio_lock); + mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED); *val = lan9303_mdio_real_read(sw_dev->device, reg); *val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16); mutex_unlock(&sw_dev->device->bus->mdio_lock); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5665d0c3668f..1dee27349367 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6647,9 +6647,9 @@ static void tg3_tx(struct tg3_napi *tnapi) tnapi->tx_cons = sw_idx; - /* Need to make the tx_cons update visible to tg3_start_xmit() + /* Need to make the tx_cons update visible to __tg3_start_xmit() * before checking for netif_queue_stopped(). Without the - * memory barrier, there is a small possibility that tg3_start_xmit() + * memory barrier, there is a small possibility that __tg3_start_xmit() * will miss it and cause the queue to be stopped forever. */ smp_mb(); @@ -7889,7 +7889,7 @@ static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb) return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3; } -static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); +static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *); /* Use GSO to workaround all TSO packets that meet HW bug conditions * indicated in tg3_tx_frag_set() @@ -7923,7 +7923,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, skb_list_walk_safe(segs, seg, next) { skb_mark_not_on_list(seg); - tg3_start_xmit(seg, tp->dev); + __tg3_start_xmit(seg, tp->dev); } tg3_tso_bug_end: @@ -7933,7 +7933,7 @@ tg3_tso_bug_end: } /* hard_start_xmit for all devices */ -static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); u32 len, entry, base_flags, mss, vlan = 0; @@ -8182,11 +8182,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_wake_queue(txq); } - if (!netdev_xmit_more() || netif_xmit_stopped(txq)) { - /* Packets are ready, update Tx producer idx on card. */ - tw32_tx_mbox(tnapi->prodmbox, entry); - } - return NETDEV_TX_OK; dma_error: @@ -8199,6 +8194,42 @@ drop_nofree: return NETDEV_TX_OK; } +static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_queue *txq; + u16 skb_queue_mapping; + netdev_tx_t ret; + + skb_queue_mapping = skb_get_queue_mapping(skb); + txq = netdev_get_tx_queue(dev, skb_queue_mapping); + + ret = __tg3_start_xmit(skb, dev); + + /* Notify the hardware that packets are ready by updating the TX ring + * tail pointer. We respect netdev_xmit_more() thus avoiding poking + * the hardware for every packet. To guarantee forward progress the TX + * ring must be drained when it is full as indicated by + * netif_xmit_stopped(). This needs to happen even when the current + * skb was dropped or rejected with NETDEV_TX_BUSY. Otherwise packets + * queued by previous __tg3_start_xmit() calls might get stuck in + * the queue forever. + */ + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) { + struct tg3_napi *tnapi; + struct tg3 *tp; + + tp = netdev_priv(dev); + tnapi = &tp->napi[skb_queue_mapping]; + + if (tg3_flag(tp, ENABLE_TSS)) + tnapi++; + + tw32_tx_mbox(tnapi->prodmbox, tnapi->tx_prod); + } + + return ret; +} + static void tg3_mac_loopback(struct tg3 *tp, bool enable) { if (enable) { @@ -17729,7 +17760,7 @@ static int tg3_init_one(struct pci_dev *pdev, * device behind the EPB cannot support DMA addresses > 40-bit. * On 64-bit systems with IOMMU, use 40-bit dma_mask. * On 64-bit systems without IOMMU, use 64-bit dma_mask and - * do DMA address check in tg3_start_xmit(). + * do DMA address check in __tg3_start_xmit(). */ if (tg3_flag(tp, IS_5788)) persist_dma_mask = dma_mask = DMA_BIT_MASK(32); @@ -18127,7 +18158,8 @@ static void tg3_shutdown(struct pci_dev *pdev) if (netif_running(dev)) dev_close(dev); - tg3_power_down(tp); + if (system_state == SYSTEM_POWER_OFF) + tg3_power_down(tp); rtnl_unlock(); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 30bec47bc665..cffbf27c4656 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2769,7 +2769,7 @@ static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog, if (priv->min_num_stack_tx_queues + num_xdp_tx_queues > priv->num_tx_rings) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Reserving %d XDP TXQs does not leave a minimum of %d TXQs for network stack (total %d available)", + "Reserving %d XDP TXQs does not leave a minimum of %d for stack (total %d)", num_xdp_tx_queues, priv->min_num_stack_tx_queues, priv->num_tx_rings); diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c index 74bc111b4849..cc4e9e2addb7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c @@ -231,6 +231,5 @@ int i40e_devlink_create_port(struct i40e_pf *pf) **/ void i40e_devlink_destroy_port(struct i40e_pf *pf) { - devlink_port_type_clear(&pf->devlink_port); devlink_port_unregister(&pf->devlink_port); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3157d14d9b12..f7a332e51524 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -14213,8 +14213,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) } set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; - if (vsi->type == I40E_VSI_MAIN) - i40e_devlink_destroy_port(pf); + if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { vsi->netdev_registered = false; @@ -14228,6 +14227,9 @@ int i40e_vsi_release(struct i40e_vsi *vsi) i40e_vsi_disable_irq(vsi); } + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); + spin_lock_bh(&vsi->mac_filter_hash_lock); /* clear the sync flag on all filters */ @@ -14402,14 +14404,14 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) err_rings: i40e_vsi_free_q_vectors(vsi); - if (vsi->type == I40E_VSI_MAIN) - i40e_devlink_destroy_port(pf); if (vsi->netdev_registered) { vsi->netdev_registered = false; unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: i40e_vsi_clear(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index b980f89dc892..cd065ec48c87 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -628,7 +628,7 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf) INIT_LIST_HEAD(&ndlist.node); rcu_read_lock(); for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_KERNEL); + nl = kzalloc(sizeof(*nl), GFP_ATOMIC); if (!nl) break; @@ -1555,18 +1555,12 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr) */ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) { - struct ice_lag_netdev_list *entry; struct ice_netdev_priv *np; - struct net_device *netdev; struct ice_pf *pf; - list_for_each_entry(entry, lag->netdev_head, node) { - netdev = entry->netdev; - np = netdev_priv(netdev); - pf = np->vsi->back; - - ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); - } + np = netdev_priv(lag->netdev); + pf = np->vsi->back; + ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); } /** @@ -1698,7 +1692,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper_netdev, tmp_nd) { - nd_list = kzalloc(sizeof(*nd_list), GFP_KERNEL); + nd_list = kzalloc(sizeof(*nd_list), GFP_ATOMIC); if (!nd_list) break; @@ -2075,7 +2069,7 @@ void ice_lag_rebuild(struct ice_pf *pf) INIT_LIST_HEAD(&ndlist.node); rcu_read_lock(); for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_KERNEL); + nl = kzalloc(sizeof(*nl), GFP_ATOMIC); if (!nl) break; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 37b54db91df2..dd03cb69ad26 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -630,32 +630,83 @@ bool ice_is_tunnel_supported(struct net_device *dev) return ice_tc_tun_get_type(dev) != TNL_LAST; } -static int -ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr, - struct flow_action_entry *act) +static bool ice_tc_is_dev_uplink(struct net_device *dev) +{ + return netif_is_ice(dev) || ice_is_tunnel_supported(dev); +} + +static int ice_tc_setup_redirect_action(struct net_device *filter_dev, + struct ice_tc_flower_fltr *fltr, + struct net_device *target_dev) { struct ice_repr *repr; + fltr->action.fltr_act = ICE_FWD_TO_VSI; + + if (ice_is_port_repr_netdev(filter_dev) && + ice_is_port_repr_netdev(target_dev)) { + repr = ice_netdev_to_repr(target_dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else if (ice_is_port_repr_netdev(filter_dev) && + ice_tc_is_dev_uplink(target_dev)) { + repr = ice_netdev_to_repr(filter_dev); + + fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi; + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else if (ice_tc_is_dev_uplink(filter_dev) && + ice_is_port_repr_netdev(target_dev)) { + repr = ice_netdev_to_repr(target_dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_INGRESS; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unsupported netdevice in switchdev mode"); + return -EINVAL; + } + + return 0; +} + +static int +ice_tc_setup_drop_action(struct net_device *filter_dev, + struct ice_tc_flower_fltr *fltr) +{ + fltr->action.fltr_act = ICE_DROP_PACKET; + + if (ice_is_port_repr_netdev(filter_dev)) { + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else if (ice_tc_is_dev_uplink(filter_dev)) { + fltr->direction = ICE_ESWITCH_FLTR_INGRESS; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unsupported netdevice in switchdev mode"); + return -EINVAL; + } + + return 0; +} + +static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, + struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + int err; + switch (act->id) { case FLOW_ACTION_DROP: - fltr->action.fltr_act = ICE_DROP_PACKET; + err = ice_tc_setup_drop_action(filter_dev, fltr); + if (err) + return err; + break; case FLOW_ACTION_REDIRECT: - fltr->action.fltr_act = ICE_FWD_TO_VSI; - - if (ice_is_port_repr_netdev(act->dev)) { - repr = ice_netdev_to_repr(act->dev); - - fltr->dest_vsi = repr->src_vsi; - fltr->direction = ICE_ESWITCH_FLTR_INGRESS; - } else if (netif_is_ice(act->dev) || - ice_is_tunnel_supported(act->dev)) { - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; - } else { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode"); - return -EINVAL; - } + err = ice_tc_setup_redirect_action(filter_dev, fltr, act->dev); + if (err) + return err; break; @@ -696,10 +747,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) goto exit; } - /* egress traffic is always redirect to uplink */ - if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) - fltr->dest_vsi = vsi->back->switchdev.uplink_vsi; - rule_info.sw_act.fltr_act = fltr->action.fltr_act; if (fltr->action.fltr_act != ICE_DROP_PACKET) rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx; @@ -713,13 +760,21 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.flags_info.act_valid = true; if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { + /* Uplink to VF */ rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; - } else { + } else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS && + fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) { + /* VF to Uplink */ rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; + } else { + /* VF to VF */ + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } /* specify the cookie as filter_rule_id */ @@ -1745,16 +1800,17 @@ ice_tc_parse_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, /** * ice_parse_tc_flower_actions - Parse the actions for a TC filter + * @filter_dev: Pointer to device on which filter is being added * @vsi: Pointer to VSI * @cls_flower: Pointer to TC flower offload structure * @fltr: Pointer to TC flower filter structure * * Parse the actions for a TC filter */ -static int -ice_parse_tc_flower_actions(struct ice_vsi *vsi, - struct flow_cls_offload *cls_flower, - struct ice_tc_flower_fltr *fltr) +static int ice_parse_tc_flower_actions(struct net_device *filter_dev, + struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) { struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower); struct flow_action *flow_action = &rule->action; @@ -1769,7 +1825,7 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, flow_action_for_each(i, act, flow_action) { if (ice_is_eswitch_mode_switchdev(vsi->back)) - err = ice_eswitch_tc_parse_action(fltr, act); + err = ice_eswitch_tc_parse_action(filter_dev, fltr, act); else err = ice_tc_parse_action(vsi, fltr, act); if (err) @@ -1856,7 +1912,7 @@ ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi, if (err < 0) goto err; - err = ice_parse_tc_flower_actions(vsi, f, fltr); + err = ice_parse_tc_flower_actions(netdev, vsi, f, fltr); if (err < 0) goto err; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 5e1ef70d54fe..1f728a9004d9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2365,7 +2365,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, */ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct skb_shared_info *shinfo; union { struct iphdr *v4; struct ipv6hdr *v6; @@ -2379,13 +2379,15 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) u32 paylen, l4_start; int err; - if (!shinfo->gso_size) + if (!skb_is_gso(skb)) return 0; err = skb_cow_head(skb, 0); if (err < 0) return err; + shinfo = skb_shinfo(skb); + ip.hdr = skb_network_header(skb); l4.hdr = skb_transport_header(skb); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 1a42bfded872..7ca6941ea0b9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -818,7 +818,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) int qidx, sqe_tail, sqe_head; struct otx2_snd_queue *sq; u64 incr, *ptr, val; - int timeout = 1000; ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { @@ -827,15 +826,11 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) continue; incr = (u64)qidx << 32; - while (timeout) { - val = otx2_atomic64_add(incr, ptr); - sqe_head = (val >> 20) & 0x3F; - sqe_tail = (val >> 28) & 0x3F; - if (sqe_head == sqe_tail) - break; - usleep_range(1, 3); - timeout--; - } + val = otx2_atomic64_add(incr, ptr); + sqe_head = (val >> 20) & 0x3F; + sqe_tail = (val >> 28) & 0x3F; + if (sqe_head != sqe_tail) + usleep_range(50, 60); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index c04a8ee53a82..e7c69b57147e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -977,6 +977,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en); int otx2_txsch_alloc(struct otx2_nic *pfvf); void otx2_txschq_stop(struct otx2_nic *pfvf); void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq); +void otx2_free_pending_sqe(struct otx2_nic *pfvf); void otx2_sqb_flush(struct otx2_nic *pfvf); int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, dma_addr_t *dma); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6daf4d58c25d..91b99fd70361 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1193,31 +1193,32 @@ static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = { }; static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = { - "NIX_SND_STATUS_GOOD", - "NIX_SND_STATUS_SQ_CTX_FAULT", - "NIX_SND_STATUS_SQ_CTX_POISON", - "NIX_SND_STATUS_SQB_FAULT", - "NIX_SND_STATUS_SQB_POISON", - "NIX_SND_STATUS_HDR_ERR", - "NIX_SND_STATUS_EXT_ERR", - "NIX_SND_STATUS_JUMP_FAULT", - "NIX_SND_STATUS_JUMP_POISON", - "NIX_SND_STATUS_CRC_ERR", - "NIX_SND_STATUS_IMM_ERR", - "NIX_SND_STATUS_SG_ERR", - "NIX_SND_STATUS_MEM_ERR", - "NIX_SND_STATUS_INVALID_SUBDC", - "NIX_SND_STATUS_SUBDC_ORDER_ERR", - "NIX_SND_STATUS_DATA_FAULT", - "NIX_SND_STATUS_DATA_POISON", - "NIX_SND_STATUS_NPC_DROP_ACTION", - "NIX_SND_STATUS_LOCK_VIOL", - "NIX_SND_STATUS_NPC_UCAST_CHAN_ERR", - "NIX_SND_STATUS_NPC_MCAST_CHAN_ERR", - "NIX_SND_STATUS_NPC_MCAST_ABORT", - "NIX_SND_STATUS_NPC_VTAG_PTR_ERR", - "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR", - "NIX_SND_STATUS_SEND_STATS_ERR", + [NIX_SND_STATUS_GOOD] = "NIX_SND_STATUS_GOOD", + [NIX_SND_STATUS_SQ_CTX_FAULT] = "NIX_SND_STATUS_SQ_CTX_FAULT", + [NIX_SND_STATUS_SQ_CTX_POISON] = "NIX_SND_STATUS_SQ_CTX_POISON", + [NIX_SND_STATUS_SQB_FAULT] = "NIX_SND_STATUS_SQB_FAULT", + [NIX_SND_STATUS_SQB_POISON] = "NIX_SND_STATUS_SQB_POISON", + [NIX_SND_STATUS_HDR_ERR] = "NIX_SND_STATUS_HDR_ERR", + [NIX_SND_STATUS_EXT_ERR] = "NIX_SND_STATUS_EXT_ERR", + [NIX_SND_STATUS_JUMP_FAULT] = "NIX_SND_STATUS_JUMP_FAULT", + [NIX_SND_STATUS_JUMP_POISON] = "NIX_SND_STATUS_JUMP_POISON", + [NIX_SND_STATUS_CRC_ERR] = "NIX_SND_STATUS_CRC_ERR", + [NIX_SND_STATUS_IMM_ERR] = "NIX_SND_STATUS_IMM_ERR", + [NIX_SND_STATUS_SG_ERR] = "NIX_SND_STATUS_SG_ERR", + [NIX_SND_STATUS_MEM_ERR] = "NIX_SND_STATUS_MEM_ERR", + [NIX_SND_STATUS_INVALID_SUBDC] = "NIX_SND_STATUS_INVALID_SUBDC", + [NIX_SND_STATUS_SUBDC_ORDER_ERR] = "NIX_SND_STATUS_SUBDC_ORDER_ERR", + [NIX_SND_STATUS_DATA_FAULT] = "NIX_SND_STATUS_DATA_FAULT", + [NIX_SND_STATUS_DATA_POISON] = "NIX_SND_STATUS_DATA_POISON", + [NIX_SND_STATUS_NPC_DROP_ACTION] = "NIX_SND_STATUS_NPC_DROP_ACTION", + [NIX_SND_STATUS_LOCK_VIOL] = "NIX_SND_STATUS_LOCK_VIOL", + [NIX_SND_STATUS_NPC_UCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_UCAST_CHAN_ERR", + [NIX_SND_STATUS_NPC_MCAST_CHAN_ERR] = "NIX_SND_STAT_NPC_MCAST_CHAN_ERR", + [NIX_SND_STATUS_NPC_MCAST_ABORT] = "NIX_SND_STATUS_NPC_MCAST_ABORT", + [NIX_SND_STATUS_NPC_VTAG_PTR_ERR] = "NIX_SND_STATUS_NPC_VTAG_PTR_ERR", + [NIX_SND_STATUS_NPC_VTAG_SIZE_ERR] = "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR", + [NIX_SND_STATUS_SEND_MEM_FAULT] = "NIX_SND_STATUS_SEND_MEM_FAULT", + [NIX_SND_STATUS_SEND_STATS_ERR] = "NIX_SND_STATUS_SEND_STATS_ERR", }; static irqreturn_t otx2_q_intr_handler(int irq, void *data) @@ -1238,14 +1239,16 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) continue; if (val & BIT_ULL(42)) { - netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", + netdev_err(pf->netdev, + "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", qidx, otx2_read64(pf, NIX_LF_ERR_INT)); } else { if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR)) netdev_err(pf->netdev, "CQ%lld: Doorbell error", qidx); if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT)) - netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM", + netdev_err(pf->netdev, + "CQ%lld: Memory fault on CQE write to LLC/DRAM", qidx); } @@ -1272,7 +1275,8 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) (val & NIX_SQINT_BITS)); if (val & BIT_ULL(42)) { - netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", + netdev_err(pf->netdev, + "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", qidx, otx2_read64(pf, NIX_LF_ERR_INT)); goto done; } @@ -1282,8 +1286,11 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) goto chk_mnq_err_dbg; sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg); - netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx) err=%s\n", - qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]); + netdev_err(pf->netdev, + "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(0x%llx) err=%s(%#x)\n", + qidx, sq_op_err_dbg, + nix_sqoperr_e_str[sq_op_err_code], + sq_op_err_code); otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44)); @@ -1300,16 +1307,21 @@ chk_mnq_err_dbg: goto chk_snd_err_dbg; mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg); - netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx) err=%s\n", - qidx, mnq_err_dbg, nix_mnqerr_e_str[mnq_err_code]); + netdev_err(pf->netdev, + "SQ%lld: NIX_LF_MNQ_ERR_DBG(0x%llx) err=%s(%#x)\n", + qidx, mnq_err_dbg, nix_mnqerr_e_str[mnq_err_code], + mnq_err_code); otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44)); chk_snd_err_dbg: snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG); if (snd_err_dbg & BIT(44)) { snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg); - netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n", - qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]); + netdev_err(pf->netdev, + "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s(%#x)\n", + qidx, snd_err_dbg, + nix_snd_status_e_str[snd_err_code], + snd_err_code); otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44)); } @@ -1589,6 +1601,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) else otx2_cleanup_tx_cqes(pf, cq); } + otx2_free_pending_sqe(pf); otx2_free_sq_res(pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h index fa37b9f312ca..4e5899d8fa2e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h @@ -318,23 +318,23 @@ enum nix_snd_status_e { NIX_SND_STATUS_EXT_ERR = 0x6, NIX_SND_STATUS_JUMP_FAULT = 0x7, NIX_SND_STATUS_JUMP_POISON = 0x8, - NIX_SND_STATUS_CRC_ERR = 0x9, - NIX_SND_STATUS_IMM_ERR = 0x10, - NIX_SND_STATUS_SG_ERR = 0x11, - NIX_SND_STATUS_MEM_ERR = 0x12, - NIX_SND_STATUS_INVALID_SUBDC = 0x13, - NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14, - NIX_SND_STATUS_DATA_FAULT = 0x15, - NIX_SND_STATUS_DATA_POISON = 0x16, - NIX_SND_STATUS_NPC_DROP_ACTION = 0x17, - NIX_SND_STATUS_LOCK_VIOL = 0x18, - NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19, - NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20, - NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21, - NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22, - NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23, - NIX_SND_STATUS_SEND_MEM_FAULT = 0x24, - NIX_SND_STATUS_SEND_STATS_ERR = 0x25, + NIX_SND_STATUS_CRC_ERR = 0x10, + NIX_SND_STATUS_IMM_ERR = 0x11, + NIX_SND_STATUS_SG_ERR = 0x12, + NIX_SND_STATUS_MEM_ERR = 0x13, + NIX_SND_STATUS_INVALID_SUBDC = 0x14, + NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x15, + NIX_SND_STATUS_DATA_FAULT = 0x16, + NIX_SND_STATUS_DATA_POISON = 0x17, + NIX_SND_STATUS_NPC_DROP_ACTION = 0x20, + NIX_SND_STATUS_LOCK_VIOL = 0x21, + NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x22, + NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x23, + NIX_SND_STATUS_NPC_MCAST_ABORT = 0x24, + NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x25, + NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x26, + NIX_SND_STATUS_SEND_MEM_FAULT = 0x27, + NIX_SND_STATUS_SEND_STATS_ERR = 0x28, NIX_SND_STATUS_MAX, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 53b2a4ef5298..6ee15f3c25ed 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1247,9 +1247,11 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int q void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) { + int tx_pkts = 0, tx_bytes = 0; struct sk_buff *skb = NULL; struct otx2_snd_queue *sq; struct nix_cqe_tx_s *cqe; + struct netdev_queue *txq; int processed_cqe = 0; struct sg_list *sg; int qidx; @@ -1270,12 +1272,20 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) sg = &sq->sg[cqe->comp.sqe_id]; skb = (struct sk_buff *)sg->skb; if (skb) { + tx_bytes += skb->len; + tx_pkts++; otx2_dma_unmap_skb_frags(pfvf, sg); dev_kfree_skb_any(skb); sg->skb = (u64)NULL; } } + if (likely(tx_pkts)) { + if (qidx >= pfvf->hw.tx_queues) + qidx -= pfvf->hw.xdp_queues; + txq = netdev_get_tx_queue(pfvf->netdev, qidx); + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + } /* Free CQEs to HW */ otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR, ((u64)cq->cq_idx << 32) | processed_cqe); @@ -1302,6 +1312,38 @@ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable) return err; } +void otx2_free_pending_sqe(struct otx2_nic *pfvf) +{ + int tx_pkts = 0, tx_bytes = 0; + struct sk_buff *skb = NULL; + struct otx2_snd_queue *sq; + struct netdev_queue *txq; + struct sg_list *sg; + int sq_idx, sqe; + + for (sq_idx = 0; sq_idx < pfvf->hw.tx_queues; sq_idx++) { + sq = &pfvf->qset.sq[sq_idx]; + for (sqe = 0; sqe < sq->sqe_cnt; sqe++) { + sg = &sq->sg[sqe]; + skb = (struct sk_buff *)sg->skb; + if (skb) { + tx_bytes += skb->len; + tx_pkts++; + otx2_dma_unmap_skb_frags(pfvf, sg); + dev_kfree_skb_any(skb); + sg->skb = (u64)NULL; + } + } + + if (!tx_pkts) + continue; + txq = netdev_get_tx_queue(pfvf->netdev, sq_idx); + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + tx_pkts = 0; + tx_bytes = 0; + } +} + static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, u64 dma_addr, int len, int *offset) { diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a987defb575c..0c76c162b8a9 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2582,9 +2582,13 @@ static void rtl_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_PROMISC) { rx_mode |= AcceptAllPhys; + } else if (!(dev->flags & IFF_MULTICAST)) { + rx_mode &= ~AcceptMulticast; } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || dev->flags & IFF_ALLMULTI || - tp->mac_version == RTL_GIGA_MAC_VER_35) { + tp->mac_version == RTL_GIGA_MAC_VER_35 || + tp->mac_version == RTL_GIGA_MAC_VER_46 || + tp->mac_version == RTL_GIGA_MAC_VER_48) { /* accept all multicasts */ } else if (netdev_mc_empty(dev)) { rx_mode &= ~AcceptMulticast; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 7a8f47e7b728..a4e8b498dea9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -259,7 +259,7 @@ ((val) << XGMAC_PPS_MINIDX(x)) #define XGMAC_PPSCMD_START 0x2 #define XGMAC_PPSCMD_STOP 0x5 -#define XGMAC_PPSEN0 BIT(4) +#define XGMAC_PPSENx(x) BIT(4 + (x) * 8) #define XGMAC_PPSx_TARGET_TIME_SEC(x) (0x00000d80 + (x) * 0x10) #define XGMAC_PPSx_TARGET_TIME_NSEC(x) (0x00000d84 + (x) * 0x10) #define XGMAC_TRGTBUSY0 BIT(31) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index f352be269deb..453e88b75be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1178,7 +1178,19 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index, val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START); val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START); - val |= XGMAC_PPSEN0; + + /* XGMAC Core has 4 PPS outputs at most. + * + * Prior XGMAC Core 3.20, Fixed mode or Flexible mode are selectable for + * PPS0 only via PPSEN0. PPS{1,2,3} are in Flexible mode by default, + * and can not be switched to Fixed mode, since PPSEN{1,2,3} are + * read-only reserved to 0. + * But we always set PPSEN{1,2,3} do not make things worse ;-) + * + * From XGMAC Core 3.20 and later, PPSEN{0,1,2,3} are writable and must + * be set, or the PPS outputs stay in Fixed PPS mode by default. + */ + val |= XGMAC_PPSENx(index); writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index)); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 24120605502f..ece9f8df98ae 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1588,10 +1588,10 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy /* rx_pause/tx_pause */ if (rx_pause) - mac_control |= CPSW_SL_CTL_RX_FLOW_EN; + mac_control |= CPSW_SL_CTL_TX_FLOW_EN; if (tx_pause) - mac_control |= CPSW_SL_CTL_TX_FLOW_EN; + mac_control |= CPSW_SL_CTL_RX_FLOW_EN; cpsw_sl_ctl_set(port->slave.mac_sl, mac_control); diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 4cf2a52e4378..3025e9c18970 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -177,7 +177,7 @@ static void icss_iep_set_counter(struct icss_iep *iep, u64 ns) if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) writel(upper_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG1]); - writel(upper_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]); + writel(lower_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]); } static void icss_iep_update_to_next_boundary(struct icss_iep *iep, u64 start_ns); diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 531bf919aef5..e0d26148dfd9 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -163,7 +163,6 @@ typedef void buffer_t; /* Information about built-in Ethernet MAC interfaces */ struct eth_plat_info { - u8 phy; /* MII PHY ID, 0 - 31 */ u8 rxq; /* configurable, currently 0 - 31 only */ u8 txreadyq; u8 hwaddr[ETH_ALEN]; @@ -1583,7 +1582,7 @@ static int ixp4xx_eth_probe(struct platform_device *pdev) if ((err = register_netdev(ndev))) goto err_phy_dis; - netdev_info(ndev, "%s: MII PHY %i on %s\n", ndev->name, plat->phy, + netdev_info(ndev, "%s: MII PHY %s on %s\n", ndev->name, phydev_name(phydev), npe_name(port->npe)); return 0; diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c index 4630dde01974..5d0f11f280cf 100644 --- a/drivers/net/mdio/acpi_mdio.c +++ b/drivers/net/mdio/acpi_mdio.c @@ -16,6 +16,7 @@ MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ACPI MDIO bus (Ethernet PHY) accessors"); /** * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1183ef5e203e..fd02f5cbc853 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -14,6 +14,7 @@ MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FWNODE MDIO bus (Ethernet PHY) accessors"); static struct pse_control * fwnode_find_pse_control(struct fwnode_handle *fwnode) diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index 70edeeb7771e..c2170650415c 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -205,3 +205,4 @@ module_platform_driver(aspeed_mdio_driver); MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ASPEED MDIO bus controller"); diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c index 81b7748c10ce..f88639297ff2 100644 --- a/drivers/net/mdio/mdio-bitbang.c +++ b/drivers/net/mdio/mdio-bitbang.c @@ -263,3 +263,4 @@ void free_mdio_bitbang(struct mii_bus *bus) EXPORT_SYMBOL(free_mdio_bitbang); MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Bitbanged MDIO buses"); diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 7eb32ebb846d..64ebcb6d235c 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -25,6 +25,7 @@ MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("OpenFirmware MDIO bus (Ethernet PHY) accessors"); /* Extract the clause 22 phy ID from the compatible string of the form * ethernet-phy-idAAAA.BBBB */ diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index ef00d6163061..cb4b91af5e17 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -942,3 +942,4 @@ struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) EXPORT_SYMBOL_GPL(bcm_ptp_probe); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Broadcom PHY PTP driver"); diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index cc2858107668..e81404bf8994 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -223,3 +223,4 @@ static struct phy_driver bcm87xx_driver[] = { module_phy_driver(bcm87xx_driver); MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Broadcom BCM87xx PHY driver"); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6712883498bb..25c19496a336 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1616,6 +1616,7 @@ struct phylink *phylink_create(struct phylink_config *config, pl->config = config; if (config->type == PHYLINK_NETDEV) { pl->netdev = to_net_dev(config->dev); + netif_carrier_off(pl->netdev); } else if (config->type == PHYLINK_DEV) { pl->dev = config->dev; } else { @@ -3726,3 +3727,4 @@ static int __init phylink_init(void) module_init(phylink_init); MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("phylink models the MAC to optional PHY connection"); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index b8c0961daf53..5468bd209fab 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -3153,3 +3153,4 @@ module_exit(sfp_exit); MODULE_ALIAS("platform:sfp"); MODULE_AUTHOR("Russell King"); MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("SFP cage support"); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index a9beacd552cf..0193af2d31c9 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -570,8 +570,8 @@ static struct bpf_prog *get_filter(struct sock_fprog *uprog) /* uprog->len is unsigned short, so no overflow here */ fprog.len = uprog->len; - fprog.filter = memdup_user(uprog->filter, - uprog->len * sizeof(struct sock_filter)); + fprog.filter = memdup_array_user(uprog->filter, + uprog->len, sizeof(struct sock_filter)); if (IS_ERR(fprog.filter)) return ERR_CAST(fprog.filter); diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile index eedca8c72098..74f59ceed3d5 100644 --- a/drivers/nvme/Makefile +++ b/drivers/nvme/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_NVME_COMMON) += common/ +obj-y += common/ obj-y += host/ obj-y += target/ diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig index 06c8df00d1e2..244432e0b73d 100644 --- a/drivers/nvme/common/Kconfig +++ b/drivers/nvme/common/Kconfig @@ -1,14 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config NVME_COMMON - tristate - config NVME_KEYRING - bool + tristate select KEYS config NVME_AUTH - bool + tristate select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA256 diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile index 0cbd0b0b8d49..681514cf2e2f 100644 --- a/drivers/nvme/common/Makefile +++ b/drivers/nvme/common/Makefile @@ -2,7 +2,8 @@ ccflags-y += -I$(src) -obj-$(CONFIG_NVME_COMMON) += nvme-common.o +obj-$(CONFIG_NVME_AUTH) += nvme-auth.o +obj-$(CONFIG_NVME_KEYRING) += nvme-keyring.o -nvme-common-$(CONFIG_NVME_AUTH) += auth.o -nvme-common-$(CONFIG_NVME_KEYRING) += keyring.o +nvme-auth-y += auth.o +nvme-keyring-y += keyring.o diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a8e87dfbeab2..a23ab5c968b9 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -341,7 +341,6 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen) { struct crypto_shash *tfm; - struct shash_desc *desc; u8 *hashed_key; const char *hmac_name; int ret; @@ -369,29 +368,11 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, goto out_free_key; } - desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto out_free_hash; - } - desc->tfm = tfm; - ret = crypto_shash_setkey(tfm, hashed_key, hlen); if (ret) - goto out_free_desc; - - ret = crypto_shash_init(desc); - if (ret) - goto out_free_desc; - - ret = crypto_shash_update(desc, challenge, hlen); - if (ret) - goto out_free_desc; + goto out_free_hash; - ret = crypto_shash_final(desc, aug); -out_free_desc: - kfree_sensitive(desc); + ret = crypto_shash_tfm_digest(tfm, challenge, hlen, aug); out_free_hash: crypto_free_shash(tfm); out_free_key: diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index f8d9a208397b..ee341b83eeba 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -151,7 +151,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, } EXPORT_SYMBOL_GPL(nvme_tls_psk_default); -int nvme_keyring_init(void) +static int __init nvme_keyring_init(void) { int err; @@ -171,12 +171,15 @@ int nvme_keyring_init(void) } return 0; } -EXPORT_SYMBOL_GPL(nvme_keyring_init); -void nvme_keyring_exit(void) +static void __exit nvme_keyring_exit(void) { unregister_key_type(&nvme_tls_psk_key_type); key_revoke(nvme_keyring); key_put(nvme_keyring); } -EXPORT_SYMBOL_GPL(nvme_keyring_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); +module_init(nvme_keyring_init); +module_exit(nvme_keyring_exit); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 48f7d72de5e9..8fe2dd619e80 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -95,7 +95,6 @@ config NVME_TCP config NVME_TCP_TLS bool "NVMe over Fabrics TCP TLS encryption support" depends on NVME_TCP - select NVME_COMMON select NVME_KEYRING select NET_HANDSHAKE select KEYS @@ -110,7 +109,6 @@ config NVME_TCP_TLS config NVME_HOST_AUTH bool "NVM Express over Fabrics In-Band Authentication" depends on NVME_CORE - select NVME_COMMON select NVME_AUTH help This provides support for NVMe over Fabrics In-Band Authentication. diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index eaefebb2a799..48328e36e93b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -29,6 +29,7 @@ struct nvme_dhchap_queue_context { int error; u32 s1; u32 s2; + bool bi_directional; u16 transaction; u8 status; u8 dhgroup_id; @@ -312,17 +313,17 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); if (ctrl->ctrl_key) { + chap->bi_directional = true; get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; - chap->s2 = nvme_auth_get_seqnum(); memcpy(data->rval + chap->hash_len, chap->c2, chap->hash_len); dev_dbg(ctrl->device, "%s: qid %d ctrl challenge %*ph\n", __func__, chap->qid, (int)chap->hash_len, chap->c2); } else { memset(chap->c2, 0, chap->hash_len); - chap->s2 = 0; } + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", @@ -339,10 +340,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvme_dhchap_queue_context *chap) { struct nvmf_auth_dhchap_success1_data *data = chap->buf; - size_t size = sizeof(*data); - - if (chap->s2) - size += chap->hash_len; + size_t size = sizeof(*data) + chap->hash_len; if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; @@ -663,6 +661,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) chap->error = 0; chap->s1 = 0; chap->s2 = 0; + chap->bi_directional = false; chap->transaction = 0; memset(chap->c1, 0, sizeof(chap->c1)); memset(chap->c2, 0, sizeof(chap->c2)); @@ -825,7 +824,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->s2) { + if (chap->bi_directional) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 62612f87aafa..88b54cdcbd68 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -25,7 +25,6 @@ #include "nvme.h" #include "fabrics.h" #include <linux/nvme-auth.h> -#include <linux/nvme-keyring.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -483,6 +482,7 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); @@ -3200,6 +3200,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); ctrl->identified = true; + nvme_start_keep_alive(ctrl); + return 0; } EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); @@ -4074,8 +4076,21 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) return; if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM, - log, sizeof(*log), 0)) + log, sizeof(*log), 0)) { dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); + goto out_free_log; + } + + if (log->afi & 0x70 || !(log->afi & 0x7)) { + dev_info(ctrl->device, + "Firmware is activated after next Controller Level Reset\n"); + goto out_free_log; + } + + memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1], + sizeof(ctrl->subsys->firmware_rev)); + +out_free_log: kfree(log); } @@ -4333,7 +4348,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); - nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4344,8 +4358,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl) { - nvme_start_keep_alive(ctrl); - nvme_enable_aen(ctrl); /* @@ -4724,16 +4736,11 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_ns_chr_class); goto unregister_generic_ns; } - result = nvme_keyring_init(); - if (result) - goto destroy_ns_chr; result = nvme_init_auth(); if (result) - goto keyring_exit; + goto destroy_ns_chr; return 0; -keyring_exit: - nvme_keyring_exit(); destroy_ns_chr: class_destroy(nvme_ns_chr_class); unregister_generic_ns: @@ -4757,7 +4764,6 @@ out: static void __exit nvme_core_exit(void) { nvme_exit_auth(); - nvme_keyring_exit(); class_destroy(nvme_ns_chr_class); class_destroy(nvme_subsys_class); class_destroy(nvme_class); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a15b37750d6e..49c3e46eaa1e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2530,6 +2530,12 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ nvme_quiesce_admin_queue(&ctrl->ctrl); + + /* + * Open-coding nvme_cancel_admin_tagset() as fc + * is not using nvme_cancel_request(). + */ + nvme_stop_keep_alive(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 747c879e8982..529b9954d2b8 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -510,10 +510,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); req->bio = pdu->bio; - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { pdu->nvme_status = -EINTR; - else + } else { pdu->nvme_status = nvme_req(req)->status; + if (!pdu->nvme_status) + pdu->nvme_status = blk_status_to_errno(err); + } pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4714a902f4ca..89661a9cf850 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1423,13 +1423,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) nvme_tcp_queue_id(queue), ret); goto free_icresp; } + ret = -ENOTCONN; if (queue->ctrl->ctrl.opts->tls) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { pr_err("queue %d: unhandled TLS record %d\n", nvme_tcp_queue_id(queue), ctype); - return -ENOTCONN; + goto free_icresp; } } ret = -EINVAL; @@ -2236,11 +2237,7 @@ destroy_io: nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: - nvme_quiesce_admin_queue(ctrl); - blk_sync_queue(ctrl->admin_q); - nvme_tcp_stop_queue(ctrl, 0); - nvme_cancel_admin_tagset(ctrl); - nvme_tcp_destroy_admin_queue(ctrl, new); + nvme_tcp_teardown_admin_queue(ctrl, false); return ret; } diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fa479c9f5c3d..31633da9427c 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -87,7 +87,6 @@ config NVME_TARGET_TCP config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP - select NVME_COMMON select NVME_KEYRING select NET_HANDSHAKE select KEYS @@ -102,7 +101,6 @@ config NVME_TARGET_TCP_TLS config NVME_TARGET_AUTH bool "NVMe over Fabrics In-band Authentication support" depends on NVME_TARGET - select NVME_COMMON select NVME_AUTH help This enables support for NVMe over Fabrics In-band Authentication diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 1d9854484e2e..eb7785be0ca7 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -163,11 +163,11 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, req->sq->dhchap_c2); - req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); } else { req->sq->authenticated = true; req->sq->dhchap_c2 = NULL; } + req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); return 0; } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 48d5df054cd0..9cb434c58075 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -466,6 +466,8 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) out_destroy_io: nvme_loop_destroy_io_queues(ctrl); out_destroy_admin: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -600,6 +602,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_free_queues: kfree(ctrl->queues); diff --git a/drivers/of/property.c b/drivers/of/property.c index cf8dacf3e3b8..afdaefbd03f6 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1267,6 +1267,7 @@ DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) +DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") @@ -1356,6 +1357,7 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, + { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_regulators, }, diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index 5658745c398f..b33be1e63c98 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -605,6 +605,7 @@ static int pccardd(void *__skt) dev_warn(&skt->dev, "PCMCIA: unable to register socket\n"); skt->thread = NULL; complete(&skt->thread_done); + put_device(&skt->dev); return 0; } ret = pccard_sysfs_add_socket(&skt->dev); diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index d500e5dbbc3f..b4b8363d1de2 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -513,9 +513,6 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, /* by default don't allow DMA */ p_dev->dma_mask = 0; p_dev->dev.dma_mask = &p_dev->dma_mask; - dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no); - if (!dev_name(&p_dev->dev)) - goto err_free; p_dev->devname = kasprintf(GFP_KERNEL, "pcmcia%s", dev_name(&p_dev->dev)); if (!p_dev->devname) goto err_free; @@ -573,8 +570,15 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, pcmcia_device_query(p_dev); - if (device_register(&p_dev->dev)) - goto err_unreg; + dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no); + if (device_register(&p_dev->dev)) { + mutex_lock(&s->ops_mutex); + list_del(&p_dev->socket_device_list); + s->device_count--; + mutex_unlock(&s->ops_mutex); + put_device(&p_dev->dev); + return NULL; + } return p_dev; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index d78091e79a0f..e9e31c638a67 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -684,7 +684,7 @@ EXPORT_SYMBOL(pcmcia_request_io); * pcmcia_request_irq() is a wrapper around request_irq() which allows * the PCMCIA core to clean up the registration in pcmcia_disable_device(). * Drivers are free to use request_irq() directly, but then they need to - * call free_irq() themselfves, too. Also, only %IRQF_SHARED capable IRQ + * call free_irq() themselves, too. Also, only %IRQF_SHARED capable IRQ * handlers are allowed. */ int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev, diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 1a0e3f098759..5ef888688e23 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -435,7 +435,7 @@ static int __init init_tcic(void) } /* Set up polling */ - timer_setup(&poll_timer, &tcic_timer, 0); + timer_setup(&poll_timer, tcic_timer, 0); /* Build interrupt mask */ printk(KERN_CONT ", %d sockets\n", sockets); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 42b72042f6b3..2cc35dded007 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -676,6 +676,9 @@ static int arm_cspmu_event_init(struct perf_event *event) cspmu = to_arm_cspmu(event->pmu); + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* * Following other "uncore" PMUs, we do not support sampling mode or * attach to a task (per-process mode). diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 18b91b56af1d..6ca7be05229c 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -428,12 +428,12 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -static inline u32 armv8pmu_pmcr_read(void) +static inline u64 armv8pmu_pmcr_read(void) { return read_pmcr(); } -static inline void armv8pmu_pmcr_write(u32 val) +static inline void armv8pmu_pmcr_write(u64 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); @@ -957,7 +957,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 pmcr; + u64 pmcr; /* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(U32_MAX); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 96c7f670c8f0..16acd4dcdb96 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -22,7 +22,7 @@ #include <asm/errata_list.h> #include <asm/sbi.h> -#include <asm/hwcap.h> +#include <asm/cpufeature.h> #define SYSCTL_NO_USER_ACCESS 0 #define SYSCTL_USER_ACCESS 1 @@ -543,8 +543,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - on_each_cpu_mask(mm_cpumask(event->owner->mm), - pmu_sbi_set_scounteren, (void *)event, 1); + pmu_sbi_set_scounteren((void *)event); } static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) @@ -554,8 +553,7 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - on_each_cpu_mask(mm_cpumask(event->owner->mm), - pmu_sbi_reset_scounteren, (void *)event, 1); + pmu_sbi_reset_scounteren((void *)event); ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && @@ -689,6 +687,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* Firmware counter don't support overflow yet */ fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); + if (fidx == RISCV_MAX_COUNTERS) { + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); + return IRQ_NONE; + } + event = cpu_hw_evt->events[fidx]; if (!event) { csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index bc7bb9876e57..fd5ce52d05b1 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -330,24 +330,22 @@ static const struct irq_chip iproc_gpio_irq_chip = { static int iproc_gpio_request(struct gpio_chip *gc, unsigned offset) { struct iproc_gpio *chip = gpiochip_get_data(gc); - unsigned gpio = gc->base + offset; /* not all Iproc GPIO pins can be muxed individually */ if (!chip->pinmux_is_supported) return 0; - return pinctrl_gpio_request(gpio); + return pinctrl_gpio_request(gc, offset); } static void iproc_gpio_free(struct gpio_chip *gc, unsigned offset) { struct iproc_gpio *chip = gpiochip_get_data(gc); - unsigned gpio = gc->base + offset; if (!chip->pinmux_is_supported) return; - pinctrl_gpio_free(gpio); + pinctrl_gpio_free(gc, offset); } static int iproc_gpio_direction_input(struct gpio_chip *gc, unsigned gpio) diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index c09646318419..012b0a3bad5a 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -506,17 +506,12 @@ static void cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, int va pm_runtime_put(priv->dev); } -static int cs42l43_gpio_direction_in(struct gpio_chip *chip, unsigned int offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int cs42l43_gpio_direction_out(struct gpio_chip *chip, unsigned int offset, int value) { cs42l43_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int cs42l43_gpio_add_pin_ranges(struct gpio_chip *chip) @@ -551,7 +546,7 @@ static int cs42l43_pin_probe(struct platform_device *pdev) priv->gpio_chip.request = gpiochip_generic_request; priv->gpio_chip.free = gpiochip_generic_free; - priv->gpio_chip.direction_input = cs42l43_gpio_direction_in; + priv->gpio_chip.direction_input = pinctrl_gpio_direction_input; priv->gpio_chip.direction_output = cs42l43_gpio_direction_out; priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges; priv->gpio_chip.get = cs42l43_gpio_get; diff --git a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c index 0b78cf611afe..014297a3fbd2 100644 --- a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c +++ b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c @@ -1098,7 +1098,7 @@ static int lochnagar_gpio_direction_out(struct gpio_chip *chip, { lochnagar_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int lochnagar_fill_func_groups(struct lochnagar_pin_priv *priv) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 71fc9f95584e..1fa89be29b8f 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -23,6 +23,8 @@ #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/gpio/driver.h> + #include <linux/pinctrl/consumer.h> #include <linux/pinctrl/devinfo.h> #include <linux/pinctrl/machine.h> @@ -267,7 +269,8 @@ static int pinctrl_register_pins(struct pinctrl_dev *pctldev, /** * gpio_to_pin() - GPIO range GPIO number to pin number translation * @range: GPIO range used for the translation - * @gpio: gpio pin to translate to a pin number + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * Finds the pin number for a given GPIO using the specified GPIO range * as a base for translation. The distinction between linear GPIO ranges @@ -278,25 +281,27 @@ static int pinctrl_register_pins(struct pinctrl_dev *pctldev, * result of successful pinctrl_get_device_gpio_range calls)! */ static inline int gpio_to_pin(struct pinctrl_gpio_range *range, - unsigned int gpio) + struct gpio_chip *gc, unsigned int offset) { - unsigned int offset = gpio - range->base; + unsigned int pin = gc->base + offset - range->base; if (range->pins) - return range->pins[offset]; + return range->pins[pin]; else - return range->pin_base + offset; + return range->pin_base + pin; } /** * pinctrl_match_gpio_range() - check if a certain GPIO pin is in range * @pctldev: pin controller device to check - * @gpio: gpio pin to check taken from the global GPIO pin space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * Tries to match a GPIO pin number to the ranges handled by a certain pin * controller, return the range or NULL */ static struct pinctrl_gpio_range * -pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio) +pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, struct gpio_chip *gc, + unsigned int offset) { struct pinctrl_gpio_range *range; @@ -304,8 +309,8 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio) /* Loop over the ranges */ list_for_each_entry(range, &pctldev->gpio_ranges, node) { /* Check if we're in the valid range */ - if (gpio >= range->base && - gpio < range->base + range->npins) { + if ((gc->base + offset) >= range->base && + (gc->base + offset) < range->base + range->npins) { mutex_unlock(&pctldev->mutex); return range; } @@ -317,7 +322,8 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio) /** * pinctrl_ready_for_gpio_range() - check if other GPIO pins of * the same GPIO chip are in range - * @gpio: gpio pin to check taken from the global GPIO pin space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * This function is complement of pinctrl_match_gpio_range(). If the return * value of pinctrl_match_gpio_range() is NULL, this function could be used @@ -328,19 +334,11 @@ pinctrl_match_gpio_range(struct pinctrl_dev *pctldev, unsigned gpio) * is false, it means that pinctrl device may not be ready. */ #ifdef CONFIG_GPIOLIB -static bool pinctrl_ready_for_gpio_range(unsigned gpio) +static bool pinctrl_ready_for_gpio_range(struct gpio_chip *gc, + unsigned int offset) { struct pinctrl_dev *pctldev; struct pinctrl_gpio_range *range = NULL; - /* - * FIXME: "gpio" here is a number in the global GPIO numberspace. - * get rid of this from the ranges eventually and get the GPIO - * descriptor from the gpio_chip. - */ - struct gpio_chip *chip = gpiod_to_chip(gpio_to_desc(gpio)); - - if (WARN(!chip, "no gpio_chip for gpio%i?", gpio)) - return false; mutex_lock(&pinctrldev_list_mutex); @@ -350,8 +348,8 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) mutex_lock(&pctldev->mutex); list_for_each_entry(range, &pctldev->gpio_ranges, node) { /* Check if any gpio range overlapped with gpio chip */ - if (range->base + range->npins - 1 < chip->base || - range->base > chip->base + chip->ngpio - 1) + if (range->base + range->npins - 1 < gc->base || + range->base > gc->base + gc->ngpio - 1) continue; mutex_unlock(&pctldev->mutex); mutex_unlock(&pinctrldev_list_mutex); @@ -365,12 +363,17 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) return false; } #else -static bool pinctrl_ready_for_gpio_range(unsigned gpio) { return true; } +static inline bool +pinctrl_ready_for_gpio_range(struct gpio_chip *gc, unsigned int offset) +{ + return true; +} #endif /** * pinctrl_get_device_gpio_range() - find device for GPIO range - * @gpio: the pin to locate the pin controller for + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * @outdev: the pin control device if found * @outrange: the GPIO range if found * @@ -379,7 +382,8 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) { return true; } * -EPROBE_DEFER if the GPIO range could not be found in any device since it * may still have not been registered. */ -static int pinctrl_get_device_gpio_range(unsigned gpio, +static int pinctrl_get_device_gpio_range(struct gpio_chip *gc, + unsigned int offset, struct pinctrl_dev **outdev, struct pinctrl_gpio_range **outrange) { @@ -391,7 +395,7 @@ static int pinctrl_get_device_gpio_range(unsigned gpio, list_for_each_entry(pctldev, &pinctrldev_list, node) { struct pinctrl_gpio_range *range; - range = pinctrl_match_gpio_range(pctldev, gpio); + range = pinctrl_match_gpio_range(pctldev, gc, offset); if (range) { *outdev = pctldev; *outrange = range; @@ -753,7 +757,7 @@ int pinctrl_get_group_selector(struct pinctrl_dev *pctldev, return -EINVAL; } -bool pinctrl_gpio_can_use_line(unsigned gpio) +bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset) { struct pinctrl_dev *pctldev; struct pinctrl_gpio_range *range; @@ -765,13 +769,13 @@ bool pinctrl_gpio_can_use_line(unsigned gpio) * we're probably dealing with GPIO driver * without a backing pin controller - bail out. */ - if (pinctrl_get_device_gpio_range(gpio, &pctldev, &range)) + if (pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range)) return true; mutex_lock(&pctldev->mutex); /* Convert to the pin controllers number space */ - pin = gpio_to_pin(range, gpio); + pin = gpio_to_pin(range, gc, offset); result = pinmux_can_be_used_for_gpio(pctldev, pin); @@ -783,22 +787,22 @@ EXPORT_SYMBOL_GPL(pinctrl_gpio_can_use_line); /** * pinctrl_gpio_request() - request a single pin to be used as GPIO - * @gpio: the GPIO pin number from the GPIO subsystem number space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * This function should *ONLY* be used from gpiolib-based GPIO drivers, * as part of their gpio_request() semantics, platforms and individual drivers * shall *NOT* request GPIO pins to be muxed in. */ -int pinctrl_gpio_request(unsigned gpio) +int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset) { - struct pinctrl_dev *pctldev; struct pinctrl_gpio_range *range; - int ret; - int pin; + struct pinctrl_dev *pctldev; + int ret, pin; - ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range); + ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range); if (ret) { - if (pinctrl_ready_for_gpio_range(gpio)) + if (pinctrl_ready_for_gpio_range(gc, offset)) ret = 0; return ret; } @@ -806,9 +810,9 @@ int pinctrl_gpio_request(unsigned gpio) mutex_lock(&pctldev->mutex); /* Convert to the pin controllers number space */ - pin = gpio_to_pin(range, gpio); + pin = gpio_to_pin(range, gc, offset); - ret = pinmux_request_gpio(pctldev, range, pin, gpio); + ret = pinmux_request_gpio(pctldev, range, pin, gc->base + offset); mutex_unlock(&pctldev->mutex); @@ -818,27 +822,27 @@ EXPORT_SYMBOL_GPL(pinctrl_gpio_request); /** * pinctrl_gpio_free() - free control on a single pin, currently used as GPIO - * @gpio: the GPIO pin number from the GPIO subsystem number space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * This function should *ONLY* be used from gpiolib-based GPIO drivers, - * as part of their gpio_free() semantics, platforms and individual drivers - * shall *NOT* request GPIO pins to be muxed out. + * as part of their gpio_request() semantics, platforms and individual drivers + * shall *NOT* request GPIO pins to be muxed in. */ -void pinctrl_gpio_free(unsigned gpio) +void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset) { - struct pinctrl_dev *pctldev; struct pinctrl_gpio_range *range; - int ret; - int pin; + struct pinctrl_dev *pctldev; + int ret, pin; - ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range); - if (ret) { + ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range); + if (ret) return; - } + mutex_lock(&pctldev->mutex); /* Convert to the pin controllers number space */ - pin = gpio_to_pin(range, gpio); + pin = gpio_to_pin(range, gc, offset); pinmux_free_gpio(pctldev, pin, range); @@ -846,14 +850,15 @@ void pinctrl_gpio_free(unsigned gpio) } EXPORT_SYMBOL_GPL(pinctrl_gpio_free); -static int pinctrl_gpio_direction(unsigned gpio, bool input) +static int pinctrl_gpio_direction(struct gpio_chip *gc, unsigned int offset, + bool input) { struct pinctrl_dev *pctldev; struct pinctrl_gpio_range *range; int ret; int pin; - ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range); + ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range); if (ret) { return ret; } @@ -861,7 +866,7 @@ static int pinctrl_gpio_direction(unsigned gpio, bool input) mutex_lock(&pctldev->mutex); /* Convert to the pin controllers number space */ - pin = gpio_to_pin(range, gpio); + pin = gpio_to_pin(range, gc, offset); ret = pinmux_gpio_direction(pctldev, range, pin, input); mutex_unlock(&pctldev->mutex); @@ -871,54 +876,58 @@ static int pinctrl_gpio_direction(unsigned gpio, bool input) /** * pinctrl_gpio_direction_input() - request a GPIO pin to go into input mode - * @gpio: the GPIO pin number from the GPIO subsystem number space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * This function should *ONLY* be used from gpiolib-based GPIO drivers, * as part of their gpio_direction_input() semantics, platforms and individual * drivers shall *NOT* touch pin control GPIO calls. */ -int pinctrl_gpio_direction_input(unsigned gpio) +int pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) { - return pinctrl_gpio_direction(gpio, true); + return pinctrl_gpio_direction(gc, offset, true); } EXPORT_SYMBOL_GPL(pinctrl_gpio_direction_input); /** * pinctrl_gpio_direction_output() - request a GPIO pin to go into output mode - * @gpio: the GPIO pin number from the GPIO subsystem number space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * * This function should *ONLY* be used from gpiolib-based GPIO drivers, * as part of their gpio_direction_output() semantics, platforms and individual * drivers shall *NOT* touch pin control GPIO calls. */ -int pinctrl_gpio_direction_output(unsigned gpio) +int pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) { - return pinctrl_gpio_direction(gpio, false); + return pinctrl_gpio_direction(gc, offset, false); } EXPORT_SYMBOL_GPL(pinctrl_gpio_direction_output); /** * pinctrl_gpio_set_config() - Apply config to given GPIO pin - * @gpio: the GPIO pin number from the GPIO subsystem number space + * @gc: GPIO chip structure from the GPIO subsystem + * @offset: hardware offset of the GPIO relative to the controller * @config: the configuration to apply to the GPIO * * This function should *ONLY* be used from gpiolib-based GPIO drivers, if * they need to call the underlying pin controller to change GPIO config * (for example set debounce time). */ -int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { unsigned long configs[] = { config }; struct pinctrl_gpio_range *range; struct pinctrl_dev *pctldev; int ret, pin; - ret = pinctrl_get_device_gpio_range(gpio, &pctldev, &range); + ret = pinctrl_get_device_gpio_range(gc, offset, &pctldev, &range); if (ret) return ret; mutex_lock(&pctldev->mutex); - pin = gpio_to_pin(range, gpio); + pin = gpio_to_pin(range, gc, offset); ret = pinconf_set_config(pctldev, pin, configs, ARRAY_SIZE(configs)); mutex_unlock(&pctldev->mutex); diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index b1d8f6136f99..067b0d344f0e 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1148,14 +1148,14 @@ static int chv_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) static int chv_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) { - return pinctrl_gpio_direction_input(chip->base + offset); + return pinctrl_gpio_direction_input(chip, offset); } static int chv_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { chv_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static const struct gpio_chip chv_gpio_chip = { diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index b19527a8728e..652ba451f885 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -992,14 +992,14 @@ static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) static int intel_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) { - return pinctrl_gpio_direction_input(chip->base + offset); + return pinctrl_gpio_direction_input(chip, offset); } static int intel_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { intel_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static const struct gpio_chip intel_gpio_chip = { diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c index d7bc9ef29fcc..e6878e4cf20c 100644 --- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c +++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c @@ -541,7 +541,7 @@ static void lp_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) static int lp_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) { - return pinctrl_gpio_direction_input(chip->base + offset); + return pinctrl_gpio_direction_input(chip, offset); } static int lp_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, @@ -549,7 +549,7 @@ static int lp_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, { lp_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int lp_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c index 889469c7ac26..c3d59eddd994 100644 --- a/drivers/pinctrl/mediatek/pinctrl-moore.c +++ b/drivers/pinctrl/mediatek/pinctrl-moore.c @@ -510,17 +510,12 @@ static void mtk_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value) mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DO, !!value); } -static int mtk_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio) -{ - return pinctrl_gpio_direction_input(chip->base + gpio); -} - static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio, int value) { mtk_gpio_set(chip, gpio, value); - return pinctrl_gpio_direction_output(chip->base + gpio); + return pinctrl_gpio_direction_output(chip, gpio); } static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) @@ -569,7 +564,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->parent = hw->dev; chip->request = gpiochip_generic_request; chip->free = gpiochip_generic_free; - chip->direction_input = mtk_gpio_direction_input; + chip->direction_input = pinctrl_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; chip->set = mtk_gpio_set; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 74b15952b742..e79d66a04194 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -808,17 +808,11 @@ static const struct pinmux_ops mtk_pmx_ops = { .gpio_request_enable = mtk_pmx_gpio_request_enable, }; -static int mtk_gpio_direction_input(struct gpio_chip *chip, - unsigned offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { mtk_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int mtk_gpio_get_direction(struct gpio_chip *chip, unsigned offset) @@ -898,7 +892,7 @@ static const struct gpio_chip mtk_gpio_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get_direction = mtk_gpio_get_direction, - .direction_input = mtk_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = mtk_gpio_direction_output, .get = mtk_gpio_get, .set = mtk_gpio_set, diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index b7cb5a1f1060..6392f1e05d02 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -916,7 +916,7 @@ static int mtk_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio) if (gpio >= hw->soc->npins) return -EINVAL; - return pinctrl_gpio_direction_input(chip->base + gpio); + return pinctrl_gpio_direction_input(chip, gpio); } static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio, @@ -929,7 +929,7 @@ static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio, mtk_gpio_set(chip, gpio, value); - return pinctrl_gpio_direction_output(chip->base + gpio); + return pinctrl_gpio_direction_output(chip, gpio); } static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 8bdd0124e2eb..1e658721aaba 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -171,7 +171,7 @@ static int npcmgpio_direction_input(struct gpio_chip *chip, unsigned int offset) struct npcm7xx_gpio *bank = gpiochip_get_data(chip); int ret; - ret = pinctrl_gpio_direction_input(offset + chip->base); + ret = pinctrl_gpio_direction_input(chip, offset); if (ret) return ret; @@ -188,7 +188,7 @@ static int npcmgpio_direction_output(struct gpio_chip *chip, dev_dbg(chip->parent, "gpio_direction_output: offset%d = %x\n", offset, value); - ret = pinctrl_gpio_direction_output(offset + chip->base); + ret = pinctrl_gpio_direction_output(chip, offset); if (ret) return ret; @@ -201,19 +201,13 @@ static int npcmgpio_gpio_request(struct gpio_chip *chip, unsigned int offset) int ret; dev_dbg(chip->parent, "gpio_request: offset%d\n", offset); - ret = pinctrl_gpio_request(offset + chip->base); + ret = pinctrl_gpio_request(chip, offset); if (ret) return ret; return bank->request(chip, offset); } -static void npcmgpio_gpio_free(struct gpio_chip *chip, unsigned int offset) -{ - dev_dbg(chip->parent, "gpio_free: offset%d\n", offset); - pinctrl_gpio_free(offset + chip->base); -} - static void npcmgpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc; @@ -1916,7 +1910,7 @@ static int npcm7xx_gpio_of(struct npcm7xx_pinctrl *pctrl) pctrl->gpio_bank[id].gc.direction_output = npcmgpio_direction_output; pctrl->gpio_bank[id].request = pctrl->gpio_bank[id].gc.request; pctrl->gpio_bank[id].gc.request = npcmgpio_gpio_request; - pctrl->gpio_bank[id].gc.free = npcmgpio_gpio_free; + pctrl->gpio_bank[id].gc.free = pinctrl_gpio_free; id++; } diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c index da21f6a45888..a377d36b0eb0 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c @@ -173,7 +173,7 @@ static int npcmgpio_direction_input(struct gpio_chip *chip, unsigned int offset) struct npcm8xx_gpio *bank = gpiochip_get_data(chip); int ret; - ret = pinctrl_gpio_direction_input(offset + chip->base); + ret = pinctrl_gpio_direction_input(chip, offset); if (ret) return ret; @@ -186,7 +186,7 @@ static int npcmgpio_direction_output(struct gpio_chip *chip, struct npcm8xx_gpio *bank = gpiochip_get_data(chip); int ret; - ret = pinctrl_gpio_direction_output(offset + chip->base); + ret = pinctrl_gpio_direction_output(chip, offset); if (ret) return ret; @@ -198,18 +198,13 @@ static int npcmgpio_gpio_request(struct gpio_chip *chip, unsigned int offset) struct npcm8xx_gpio *bank = gpiochip_get_data(chip); int ret; - ret = pinctrl_gpio_request(offset + chip->base); + ret = pinctrl_gpio_request(chip, offset); if (ret) return ret; return bank->request(chip, offset); } -static void npcmgpio_gpio_free(struct gpio_chip *chip, unsigned int offset) -{ - pinctrl_gpio_free(offset + chip->base); -} - static void npcmgpio_irq_handler(struct irq_desc *desc) { unsigned long sts, en, bit; @@ -2388,7 +2383,7 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl) pctrl->gpio_bank[id].gc.direction_output = npcmgpio_direction_output; pctrl->gpio_bank[id].request = pctrl->gpio_bank[id].gc.request; pctrl->gpio_bank[id].gc.request = npcmgpio_gpio_request; - pctrl->gpio_bank[id].gc.free = npcmgpio_gpio_free; + pctrl->gpio_bank[id].gc.free = pinctrl_gpio_free; for (i = 0 ; i < NPCM8XX_DEBOUNCE_MAX ; i++) pctrl->gpio_bank[id].debounce.set_val[i] = false; pctrl->gpio_bank[id].gc.add_pin_ranges = npcmgpio_add_pin_ranges; diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 84b47a6cc3a6..6a5f23cf7a2a 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -500,16 +500,11 @@ static void as3722_gpio_set(struct gpio_chip *chip, unsigned offset, "GPIO_SIGNAL_OUT_REG update failed: %d\n", ret); } -static int as3722_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int as3722_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { as3722_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int as3722_gpio_to_irq(struct gpio_chip *chip, unsigned offset) @@ -526,7 +521,7 @@ static const struct gpio_chip as3722_gpio_chip = { .free = gpiochip_generic_free, .get = as3722_gpio_get, .set = as3722_gpio_set, - .direction_input = as3722_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = as3722_gpio_direction_output, .to_irq = as3722_gpio_to_irq, .can_sleep = true, diff --git a/drivers/pinctrl/pinctrl-axp209.c b/drivers/pinctrl/pinctrl-axp209.c index 9f5b3ab8e184..2b4805e74eed 100644 --- a/drivers/pinctrl/pinctrl-axp209.c +++ b/drivers/pinctrl/pinctrl-axp209.c @@ -124,11 +124,6 @@ static int axp20x_gpio_get_reg(unsigned int offset) return -EINVAL; } -static int axp20x_gpio_input(struct gpio_chip *chip, unsigned int offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int axp20x_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct axp20x_pctl *pctl = gpiochip_get_data(chip); @@ -474,7 +469,7 @@ static int axp20x_pctl_probe(struct platform_device *pdev) pctl->chip.get = axp20x_gpio_get; pctl->chip.get_direction = axp20x_gpio_get_direction; pctl->chip.set = axp20x_gpio_set; - pctl->chip.direction_input = axp20x_gpio_input; + pctl->chip.direction_input = pinctrl_gpio_direction_input; pctl->chip.direction_output = axp20x_gpio_output; pctl->desc = of_device_get_match_data(dev); diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index fae80b52a6fc..04285c930e94 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -554,7 +554,7 @@ out: static int cy8c95x0_gpio_direction_input(struct gpio_chip *gc, unsigned int off) { - return pinctrl_gpio_direction_input(gc->base + off); + return pinctrl_gpio_direction_input(gc, off); } static int cy8c95x0_gpio_direction_output(struct gpio_chip *gc, @@ -571,7 +571,7 @@ static int cy8c95x0_gpio_direction_output(struct gpio_chip *gc, if (ret) return ret; - return pinctrl_gpio_direction_output(gc->base + off); + return pinctrl_gpio_direction_output(gc, off); } static int cy8c95x0_gpio_get_value(struct gpio_chip *gc, unsigned int off) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 2f220a47b749..ee718f6e2556 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -133,6 +133,8 @@ struct ingenic_pinctrl { struct pinctrl_pin_desc *pdesc; const struct ingenic_chip_info *info; + + struct gpio_chip *gc; }; struct ingenic_gpio_chip { @@ -3558,17 +3560,11 @@ static int ingenic_gpio_get(struct gpio_chip *gc, unsigned int offset) return (int) ingenic_gpio_get_value(jzgc, offset); } -static int ingenic_gpio_direction_input(struct gpio_chip *gc, - unsigned int offset) -{ - return pinctrl_gpio_direction_input(gc->base + offset); -} - static int ingenic_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value) { ingenic_gpio_set(gc, offset, value); - return pinctrl_gpio_direction_output(gc->base + offset); + return pinctrl_gpio_direction_output(gc, offset); } static inline void ingenic_config_pin(struct ingenic_pinctrl *jzpc, @@ -3678,7 +3674,7 @@ static int ingenic_gpio_irq_request(struct irq_data *data) irq_hw_number_t irq = irqd_to_hwirq(data); int ret; - ret = ingenic_gpio_direction_input(gpio_chip, irq); + ret = pinctrl_gpio_direction_input(gpio_chip, irq); if (ret) return ret; @@ -4052,7 +4048,8 @@ static int ingenic_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; case PIN_CONFIG_OUTPUT: - ret = pinctrl_gpio_direction_output(pin); + ret = pinctrl_gpio_direction_output(jzpc->gc, + pin - jzpc->gc->base); if (ret) return ret; @@ -4172,6 +4169,8 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc, if (!jzgc) return -ENOMEM; + jzpc->gc = &jzgc->gc; + jzgc->jzpc = jzpc; jzgc->reg_base = bank * jzpc->info->reg_offset; @@ -4192,7 +4191,7 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc, jzgc->gc.set = ingenic_gpio_set; jzgc->gc.get = ingenic_gpio_get; - jzgc->gc.direction_input = ingenic_gpio_direction_input; + jzgc->gc.direction_input = pinctrl_gpio_direction_input; jzgc->gc.direction_output = ingenic_gpio_direction_output; jzgc->gc.get_direction = ingenic_gpio_get_direction; jzgc->gc.request = gpiochip_generic_request; diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index f8ae2e974221..52aadd6d72a8 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1776,12 +1776,6 @@ static int ocelot_gpio_get_direction(struct gpio_chip *chip, return GPIO_LINE_DIRECTION_IN; } -static int ocelot_gpio_direction_input(struct gpio_chip *chip, - unsigned int offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int ocelot_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { @@ -1795,7 +1789,7 @@ static int ocelot_gpio_direction_output(struct gpio_chip *chip, regmap_write(info->map, REG(OCELOT_GPIO_OUT_CLR, info, offset), pin); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static const struct gpio_chip ocelot_gpiolib_chip = { @@ -1804,7 +1798,7 @@ static const struct gpio_chip ocelot_gpiolib_chip = { .set = ocelot_gpio_set, .get = ocelot_gpio_get, .get_direction = ocelot_gpio_get_direction, - .direction_input = ocelot_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = ocelot_gpio_direction_output, .owner = THIS_MODULE, }; diff --git a/drivers/pinctrl/pinctrl-rk805.c b/drivers/pinctrl/pinctrl-rk805.c index 2639a9ee82cd..56d916f2cee6 100644 --- a/drivers/pinctrl/pinctrl-rk805.c +++ b/drivers/pinctrl/pinctrl-rk805.c @@ -286,17 +286,11 @@ static void rk805_gpio_set(struct gpio_chip *chip, offset, value); } -static int rk805_gpio_direction_input(struct gpio_chip *chip, - unsigned int offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int rk805_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { rk805_gpio_set(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static int rk805_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -330,7 +324,7 @@ static const struct gpio_chip rk805_gpio_chip = { .get_direction = rk805_gpio_get_direction, .get = rk805_gpio_get, .set = rk805_gpio_set, - .direction_input = rk805_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = rk805_gpio_direction_output, .can_sleep = true, .base = -1, diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index c1f36b164ea5..1485573b523c 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -717,20 +717,13 @@ static void st_gpio_set(struct gpio_chip *chip, unsigned offset, int value) __st_gpio_set(bank, offset, value); } -static int st_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - pinctrl_gpio_direction_input(chip->base + offset); - - return 0; -} - static int st_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { struct st_gpio_bank *bank = gpiochip_get_data(chip); __st_gpio_set(bank, offset, value); - pinctrl_gpio_direction_output(chip->base + offset); + pinctrl_gpio_direction_output(chip, offset); return 0; } @@ -1330,7 +1323,7 @@ static int st_gpio_irq_request_resources(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); - st_gpio_direction_input(gc, d->hwirq); + pinctrl_gpio_direction_input(gc, d->hwirq); return gpiochip_reqres_irq(gc, d->hwirq); } @@ -1488,7 +1481,7 @@ static const struct gpio_chip st_gpio_template = { .free = gpiochip_generic_free, .get = st_gpio_get, .set = st_gpio_set, - .direction_input = st_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = st_gpio_direction_output, .get_direction = st_gpio_get_direction, .ngpio = ST_GPIO_PINS_PER_BANK, diff --git a/drivers/pinctrl/renesas/gpio.c b/drivers/pinctrl/renesas/gpio.c index 5758daf94fe2..a5136dacaaf2 100644 --- a/drivers/pinctrl/renesas/gpio.c +++ b/drivers/pinctrl/renesas/gpio.c @@ -135,12 +135,12 @@ static int gpio_pin_request(struct gpio_chip *gc, unsigned offset) if (idx < 0 || pfc->info->pins[idx].enum_id == 0) return -EINVAL; - return pinctrl_gpio_request(gc->base + offset); + return pinctrl_gpio_request(gc, offset); } static void gpio_pin_free(struct gpio_chip *gc, unsigned offset) { - return pinctrl_gpio_free(gc->base + offset); + return pinctrl_gpio_free(gc, offset); } static void gpio_pin_set_value(struct sh_pfc_chip *chip, unsigned offset, @@ -164,7 +164,7 @@ static void gpio_pin_set_value(struct sh_pfc_chip *chip, unsigned offset, static int gpio_pin_direction_input(struct gpio_chip *gc, unsigned offset) { - return pinctrl_gpio_direction_input(gc->base + offset); + return pinctrl_gpio_direction_input(gc, offset); } static int gpio_pin_direction_output(struct gpio_chip *gc, unsigned offset, @@ -172,7 +172,7 @@ static int gpio_pin_direction_output(struct gpio_chip *gc, unsigned offset, { gpio_pin_set_value(gpiochip_get_data(gc), offset, value); - return pinctrl_gpio_direction_output(gc->base + offset); + return pinctrl_gpio_direction_output(gc, offset); } static int gpio_pin_get(struct gpio_chip *gc, unsigned offset) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index c7c6d912a975..9de350ad7e7d 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -1056,7 +1056,7 @@ static int rzg2l_gpio_request(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - ret = pinctrl_gpio_request(chip->base + offset); + ret = pinctrl_gpio_request(chip, offset); if (ret) return ret; @@ -1181,7 +1181,7 @@ static void rzg2l_gpio_free(struct gpio_chip *chip, unsigned int offset) { unsigned int virq; - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); virq = irq_find_mapping(chip->irq.domain, offset); if (virq) diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c index 52aeafaba4b6..21d7d5ac8c4a 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c +++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c @@ -754,7 +754,7 @@ static int rzv2m_gpio_request(struct gpio_chip *chip, unsigned int offset) u8 bit = RZV2M_PIN_ID_TO_PIN(offset); int ret; - ret = pinctrl_gpio_request(chip->base + offset); + ret = pinctrl_gpio_request(chip, offset); if (ret) return ret; @@ -832,7 +832,7 @@ static int rzv2m_gpio_get(struct gpio_chip *chip, unsigned int offset) static void rzv2m_gpio_free(struct gpio_chip *chip, unsigned int offset) { - pinctrl_gpio_free(chip->base + offset); + pinctrl_gpio_free(chip, offset); /* * Set the GPIO as an input to ensure that the next GPIO request won't diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index 722681e0b89b..a05570c7d833 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -204,14 +204,13 @@ static void plgpio_set_value(struct gpio_chip *chip, unsigned offset, int value) static int plgpio_request(struct gpio_chip *chip, unsigned offset) { struct plgpio *plgpio = gpiochip_get_data(chip); - int gpio = chip->base + offset; unsigned long flags; int ret = 0; if (offset >= chip->ngpio) return -EINVAL; - ret = pinctrl_gpio_request(gpio); + ret = pinctrl_gpio_request(chip, offset); if (ret) return ret; @@ -249,14 +248,13 @@ err1: if (!IS_ERR(plgpio->clk)) clk_disable(plgpio->clk); err0: - pinctrl_gpio_free(gpio); + pinctrl_gpio_free(chip, offset); return ret; } static void plgpio_free(struct gpio_chip *chip, unsigned offset) { struct plgpio *plgpio = gpiochip_get_data(chip); - int gpio = chip->base + offset; unsigned long flags; if (offset >= chip->ngpio) @@ -280,7 +278,7 @@ disable_clk: if (!IS_ERR(plgpio->clk)) clk_disable(plgpio->clk); - pinctrl_gpio_free(gpio); + pinctrl_gpio_free(chip, offset); } /* PLGPIO IRQ */ diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index 530fe340a9a1..ea70b8c61679 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -916,16 +916,6 @@ static struct pinctrl_desc starfive_desc = { .custom_conf_items = starfive_pinconf_custom_conf_items, }; -static int starfive_gpio_request(struct gpio_chip *gc, unsigned int gpio) -{ - return pinctrl_gpio_request(gc->base + gpio); -} - -static void starfive_gpio_free(struct gpio_chip *gc, unsigned int gpio) -{ - pinctrl_gpio_free(gc->base + gpio); -} - static int starfive_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio) { struct starfive_pinctrl *sfp = container_of(gc, struct starfive_pinctrl, gc); @@ -1309,8 +1299,8 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.label = dev_name(dev); sfp->gc.owner = THIS_MODULE; - sfp->gc.request = starfive_gpio_request; - sfp->gc.free = starfive_gpio_free; + sfp->gc.request = pinctrl_gpio_request; + sfp->gc.free = pinctrl_gpio_free; sfp->gc.get_direction = starfive_gpio_get_direction; sfp->gc.direction_input = starfive_gpio_direction_input; sfp->gc.direction_output = starfive_gpio_direction_output; diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 640f827a9b2c..9d71e8c13310 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -545,16 +545,6 @@ static const struct pinconf_ops jh7110_pinconf_ops = { .is_generic = true, }; -static int jh7110_gpio_request(struct gpio_chip *gc, unsigned int gpio) -{ - return pinctrl_gpio_request(gc->base + gpio); -} - -static void jh7110_gpio_free(struct gpio_chip *gc, unsigned int gpio) -{ - pinctrl_gpio_free(gc->base + gpio); -} - static int jh7110_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio) { @@ -940,8 +930,8 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) sfp->gc.label = dev_name(dev); sfp->gc.owner = THIS_MODULE; - sfp->gc.request = jh7110_gpio_request; - sfp->gc.free = jh7110_gpio_free; + sfp->gc.request = pinctrl_gpio_request; + sfp->gc.free = pinctrl_gpio_free; sfp->gc.get_direction = jh7110_gpio_get_direction; sfp->gc.direction_input = jh7110_gpio_direction_input; sfp->gc.direction_output = jh7110_gpio_direction_output; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a73385a431de..64e8201c7eac 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -217,12 +217,7 @@ static int stm32_gpio_request(struct gpio_chip *chip, unsigned offset) return -EINVAL; } - return pinctrl_gpio_request(chip->base + offset); -} - -static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset) -{ - pinctrl_gpio_free(chip->base + offset); + return pinctrl_gpio_request(chip, offset); } static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) @@ -239,18 +234,13 @@ static void stm32_gpio_set(struct gpio_chip *chip, unsigned offset, int value) __stm32_gpio_set(bank, offset, value); } -static int stm32_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int stm32_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { struct stm32_gpio_bank *bank = gpiochip_get_data(chip); __stm32_gpio_set(bank, offset, value); - pinctrl_gpio_direction_output(chip->base + offset); + pinctrl_gpio_direction_output(chip, offset); return 0; } @@ -316,10 +306,10 @@ static int stm32_gpio_init_valid_mask(struct gpio_chip *chip, static const struct gpio_chip stm32_gpio_template = { .request = stm32_gpio_request, - .free = stm32_gpio_free, + .free = pinctrl_gpio_free, .get = stm32_gpio_get, .set = stm32_gpio_set, - .direction_input = stm32_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = stm32_gpio_direction_output, .to_irq = stm32_gpio_to_irq, .get_direction = stm32_gpio_get_direction, @@ -381,7 +371,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data) struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); int ret; - ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq); + ret = pinctrl_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq); if (ret) return ret; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 6fac30de1c6a..fce92111a32e 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -526,16 +526,11 @@ static void wmt_gpio_set_value(struct gpio_chip *chip, unsigned offset, wmt_clearbits(data, reg_data_out, BIT(bit)); } -static int wmt_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - return pinctrl_gpio_direction_input(chip->base + offset); -} - static int wmt_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { wmt_gpio_set_value(chip, offset, value); - return pinctrl_gpio_direction_output(chip->base + offset); + return pinctrl_gpio_direction_output(chip, offset); } static const struct gpio_chip wmt_gpio_chip = { @@ -544,7 +539,7 @@ static const struct gpio_chip wmt_gpio_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get_direction = wmt_gpio_get_direction, - .direction_input = wmt_gpio_direction_input, + .direction_input = pinctrl_gpio_direction_input, .direction_output = wmt_gpio_direction_output, .get = wmt_gpio_get_value, .set = wmt_gpio_set_value, diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 282cd7d24077..3f7a74788802 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -108,6 +108,7 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode) container_of(pccontext->clk, struct ptp_clock, clock); struct timestamp_event_queue *queue; char debugfsname[32]; + unsigned long flags; queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) @@ -119,7 +120,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode) } bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS); spin_lock_init(&queue->lock); + spin_lock_irqsave(&ptp->tsevqs_lock, flags); list_add_tail(&queue->qlist, &ptp->tsevqs); + spin_unlock_irqrestore(&ptp->tsevqs_lock, flags); pccontext->private_clkdata = queue; /* Debugfs contents */ @@ -139,16 +142,16 @@ int ptp_release(struct posix_clock_context *pccontext) { struct timestamp_event_queue *queue = pccontext->private_clkdata; unsigned long flags; + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); - if (queue) { - debugfs_remove(queue->debugfs_instance); - pccontext->private_clkdata = NULL; - spin_lock_irqsave(&queue->lock, flags); - list_del(&queue->qlist); - spin_unlock_irqrestore(&queue->lock, flags); - bitmap_free(queue->mask); - kfree(queue); - } + debugfs_remove(queue->debugfs_instance); + pccontext->private_clkdata = NULL; + spin_lock_irqsave(&ptp->tsevqs_lock, flags); + list_del(&queue->qlist); + spin_unlock_irqrestore(&ptp->tsevqs_lock, flags); + bitmap_free(queue->mask); + kfree(queue); return 0; } @@ -585,7 +588,5 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags, free_event: kfree(event); exit: - if (result < 0) - ptp_release(pccontext); return result; } diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 3d1b0a97301c..3134568af622 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -179,11 +179,11 @@ static void ptp_clock_release(struct device *dev) mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); /* Delete first entry */ + spin_lock_irqsave(&ptp->tsevqs_lock, flags); tsevq = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue, qlist); - spin_lock_irqsave(&tsevq->lock, flags); list_del(&tsevq->qlist); - spin_unlock_irqrestore(&tsevq->lock, flags); + spin_unlock_irqrestore(&ptp->tsevqs_lock, flags); bitmap_free(tsevq->mask); kfree(tsevq); debugfs_remove(ptp->debugfs_root); @@ -247,6 +247,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (!queue) goto no_memory_queue; list_add_tail(&queue->qlist, &ptp->tsevqs); + spin_lock_init(&ptp->tsevqs_lock); queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL); if (!queue->mask) goto no_memory_bitmap; @@ -407,6 +408,7 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event) { struct timestamp_event_queue *tsevq; struct pps_event_time evt; + unsigned long flags; switch (event->type) { @@ -415,10 +417,12 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event) case PTP_CLOCK_EXTTS: /* Enqueue timestamp on selected queues */ + spin_lock_irqsave(&ptp->tsevqs_lock, flags); list_for_each_entry(tsevq, &ptp->tsevqs, qlist) { if (test_bit((unsigned int)event->index, tsevq->mask)) enqueue_external_timestamp(tsevq, event); } + spin_unlock_irqrestore(&ptp->tsevqs_lock, flags); wake_up_interruptible(&ptp->tsev_wq); break; diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 52f87e394aa6..35fde0a05746 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -44,6 +44,7 @@ struct ptp_clock { struct pps_device *pps_source; long dialed_frequency; /* remembers the frequency adjustment */ struct list_head tsevqs; /* timestamp fifo list */ + spinlock_t tsevqs_lock; /* protects tsevqs from concurrent access */ struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ wait_queue_head_t tsev_wq; int defunct; /* tells readers to go away when clock is being removed */ diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 8ebcddf91f7b..4b956d661755 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -173,8 +173,8 @@ config PWM_CLPS711X will be called pwm-clps711x. config PWM_CRC - bool "Intel Crystalcove (CRC) PWM support" - depends on X86 && INTEL_SOC_PMIC + tristate "Intel Crystalcove (CRC) PWM support" + depends on INTEL_SOC_PMIC help Generic PWM framework driver for Crystalcove (CRC) PMIC based PWM control. @@ -186,9 +186,19 @@ config PWM_CROS_EC PWM driver for exposing a PWM attached to the ChromeOS Embedded Controller. +config PWM_DWC_CORE + tristate + depends on HAS_IOMEM + help + PWM driver for Synopsys DWC PWM Controller. + + To compile this driver as a module, build the dependecies as + modules, this will be called pwm-dwc-core. + config PWM_DWC - tristate "DesignWare PWM Controller" - depends on PCI + tristate "DesignWare PWM Controller (PCI bus)" + depends on HAS_IOMEM && PCI + select PWM_DWC_CORE help PWM driver for Synopsys DWC PWM Controller attached to a PCI bus. @@ -407,7 +417,7 @@ config PWM_MEDIATEK config PWM_MICROCHIP_CORE tristate "Microchip corePWM PWM support" - depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST + depends on ARCH_MICROCHIP_POLARFIRE || COMPILE_TEST depends on HAS_IOMEM && OF help PWM driver for Microchip FPGA soft IP core. diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index c822389c2a24..c5ec9e168ee7 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_PWM_CLK) += pwm-clk.o obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o obj-$(CONFIG_PWM_CRC) += pwm-crc.o obj-$(CONFIG_PWM_CROS_EC) += pwm-cros-ec.o +obj-$(CONFIG_PWM_DWC_CORE) += pwm-dwc-core.o obj-$(CONFIG_PWM_DWC) += pwm-dwc.o obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o obj-$(CONFIG_PWM_FSL_FTM) += pwm-fsl-ftm.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index dc66e3405bf5..29078486534d 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -89,13 +89,13 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) if (test_bit(PWMF_REQUESTED, &pwm->flags)) return -EBUSY; - if (!try_module_get(pwm->chip->ops->owner)) + if (!try_module_get(pwm->chip->owner)) return -ENODEV; if (pwm->chip->ops->request) { err = pwm->chip->ops->request(pwm->chip, pwm); if (err) { - module_put(pwm->chip->ops->owner); + module_put(pwm->chip->owner); return err; } } @@ -208,36 +208,6 @@ static void of_pwmchip_remove(struct pwm_chip *chip) of_node_put(chip->dev->of_node); } -/** - * pwm_set_chip_data() - set private chip data for a PWM - * @pwm: PWM device - * @data: pointer to chip-specific data - * - * Returns: 0 on success or a negative error code on failure. - */ -int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - if (!pwm) - return -EINVAL; - - pwm->chip_data = data; - - return 0; -} -EXPORT_SYMBOL_GPL(pwm_set_chip_data); - -/** - * pwm_get_chip_data() - get private chip data for a PWM - * @pwm: PWM device - * - * Returns: A pointer to the chip-private data for the PWM device. - */ -void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return pwm ? pwm->chip_data : NULL; -} -EXPORT_SYMBOL_GPL(pwm_get_chip_data); - static bool pwm_ops_check(const struct pwm_chip *chip) { const struct pwm_ops *ops = chip->ops; @@ -253,14 +223,16 @@ static bool pwm_ops_check(const struct pwm_chip *chip) } /** - * pwmchip_add() - register a new PWM chip + * __pwmchip_add() - register a new PWM chip * @chip: the PWM chip to add + * @owner: reference to the module providing the chip. * - * Register a new PWM chip. + * Register a new PWM chip. @owner is supposed to be THIS_MODULE, use the + * pwmchip_add wrapper to do this right. * * Returns: 0 on success or a negative error code on failure. */ -int pwmchip_add(struct pwm_chip *chip) +int __pwmchip_add(struct pwm_chip *chip, struct module *owner) { struct pwm_device *pwm; unsigned int i; @@ -272,6 +244,8 @@ int pwmchip_add(struct pwm_chip *chip) if (!pwm_ops_check(chip)) return -EINVAL; + chip->owner = owner; + chip->pwms = kcalloc(chip->npwm, sizeof(*pwm), GFP_KERNEL); if (!chip->pwms) return -ENOMEM; @@ -306,7 +280,7 @@ int pwmchip_add(struct pwm_chip *chip) return 0; } -EXPORT_SYMBOL_GPL(pwmchip_add); +EXPORT_SYMBOL_GPL(__pwmchip_add); /** * pwmchip_remove() - remove a PWM chip @@ -338,17 +312,17 @@ static void devm_pwmchip_remove(void *data) pwmchip_remove(chip); } -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip) +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner) { int ret; - ret = pwmchip_add(chip); + ret = __pwmchip_add(chip, owner); if (ret) return ret; return devm_add_action_or_reset(dev, devm_pwmchip_remove, chip); } -EXPORT_SYMBOL_GPL(devm_pwmchip_add); +EXPORT_SYMBOL_GPL(__devm_pwmchip_add); /** * pwm_request_from_chip() - request a PWM device relative to a PWM chip @@ -976,10 +950,9 @@ void pwm_put(struct pwm_device *pwm) if (pwm->chip->ops->free) pwm->chip->ops->free(pwm->chip, pwm); - pwm_set_chip_data(pwm, NULL); pwm->label = NULL; - module_put(pwm->chip->ops->owner); + module_put(pwm->chip->owner); out: mutex_unlock(&pwm_lock); } diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 583a7d69c741..670d33daea84 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -181,7 +181,6 @@ static int ab8500_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ab8500_pwm_ops = { .apply = ab8500_pwm_apply, .get_state = ab8500_pwm_get_state, - .owner = THIS_MODULE, }; static int ab8500_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-apple.c b/drivers/pwm/pwm-apple.c index 8e7d67fb5fbe..4d755b628d9e 100644 --- a/drivers/pwm/pwm-apple.c +++ b/drivers/pwm/pwm-apple.c @@ -99,7 +99,6 @@ static int apple_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops apple_pwm_ops = { .apply = apple_pwm_apply, .get_state = apple_pwm_get_state, - .owner = THIS_MODULE, }; static int apple_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index e271d920151e..07920e034757 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -170,7 +170,6 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops atmel_hlcdc_pwm_ops = { .apply = atmel_hlcdc_pwm_apply, - .owner = THIS_MODULE, }; static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = { diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index c00dd37c5fbd..98b33c016c3c 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -364,7 +364,6 @@ static const struct pwm_ops atmel_tcb_pwm_ops = { .request = atmel_tcb_pwm_request, .free = atmel_tcb_pwm_free, .apply = atmel_tcb_pwm_apply, - .owner = THIS_MODULE, }; static struct atmel_tcb_config tcb_rm9200_config = { diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 1f73325d1bea..47bcc8a3bf9d 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -402,7 +402,6 @@ static int atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops atmel_pwm_ops = { .apply = atmel_pwm_apply, .get_state = atmel_pwm_get_state, - .owner = THIS_MODULE, }; static const struct atmel_pwm_data atmel_sam9rl_pwm_data = { @@ -547,7 +546,7 @@ disable_clk: static struct platform_driver atmel_pwm_driver = { .driver = { .name = "atmel-pwm", - .of_match_table = of_match_ptr(atmel_pwm_dt_ids), + .of_match_table = atmel_pwm_dt_ids, }, .probe = atmel_pwm_probe, }; diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 7d70b6f186a6..758254025683 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -183,7 +183,6 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops iproc_pwm_ops = { .apply = iproc_pwmc_apply, .get_state = iproc_pwmc_get_state, - .owner = THIS_MODULE, }; static int iproc_pwmc_probe(struct platform_device *pdev) @@ -207,18 +206,10 @@ static int iproc_pwmc_probe(struct platform_device *pdev) if (IS_ERR(ip->base)) return PTR_ERR(ip->base); - ip->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(ip->clk)) { - dev_err(&pdev->dev, "failed to get clock: %ld\n", - PTR_ERR(ip->clk)); - return PTR_ERR(ip->clk); - } - - ret = clk_prepare_enable(ip->clk); - if (ret < 0) { - dev_err(&pdev->dev, "failed to enable clock: %d\n", ret); - return ret; - } + ip->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(ip->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(ip->clk), + "failed to get clock\n"); /* Set full drive and normal polarity for all channels */ value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); @@ -230,22 +221,12 @@ static int iproc_pwmc_probe(struct platform_device *pdev) writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); - ret = pwmchip_add(&ip->chip); - if (ret < 0) { - dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); - clk_disable_unprepare(ip->clk); - } - - return ret; -} - -static void iproc_pwmc_remove(struct platform_device *pdev) -{ - struct iproc_pwmc *ip = platform_get_drvdata(pdev); + ret = devm_pwmchip_add(&pdev->dev, &ip->chip); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to add PWM chip\n"); - pwmchip_remove(&ip->chip); - - clk_disable_unprepare(ip->clk); + return 0; } static const struct of_device_id bcm_iproc_pwmc_dt[] = { @@ -260,7 +241,6 @@ static struct platform_driver iproc_pwmc_driver = { .of_match_table = bcm_iproc_pwmc_dt, }, .probe = iproc_pwmc_probe, - .remove_new = iproc_pwmc_remove, }; module_platform_driver(iproc_pwmc_driver); diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c index e5b00cc9f7a7..15d6ed03c3ce 100644 --- a/drivers/pwm/pwm-bcm-kona.c +++ b/drivers/pwm/pwm-bcm-kona.c @@ -269,7 +269,6 @@ static int kona_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops kona_pwm_ops = { .apply = kona_pwmc_apply, - .owner = THIS_MODULE, }; static int kona_pwmc_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index bdfc2a5ec0d6..9777babd5b95 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -129,7 +129,6 @@ static const struct pwm_ops bcm2835_pwm_ops = { .request = bcm2835_pwm_request, .free = bcm2835_pwm_free, .apply = bcm2835_pwm_apply, - .owner = THIS_MODULE, }; static int bcm2835_pwm_probe(struct platform_device *pdev) @@ -147,41 +146,42 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->base)) return PTR_ERR(pc->base); - pc->clk = devm_clk_get(&pdev->dev, NULL); + pc->clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(pc->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk), "clock not found\n"); - ret = clk_prepare_enable(pc->clk); - if (ret) - return ret; - pc->chip.dev = &pdev->dev; pc->chip.ops = &bcm2835_pwm_ops; pc->chip.npwm = 2; - platform_set_drvdata(pdev, pc); - - ret = pwmchip_add(&pc->chip); + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); if (ret < 0) - goto add_fail; + return dev_err_probe(&pdev->dev, ret, + "failed to add pwmchip\n"); return 0; +} + +static int bcm2835_pwm_suspend(struct device *dev) +{ + struct bcm2835_pwm *pc = dev_get_drvdata(dev); -add_fail: clk_disable_unprepare(pc->clk); - return ret; + + return 0; } -static void bcm2835_pwm_remove(struct platform_device *pdev) +static int bcm2835_pwm_resume(struct device *dev) { - struct bcm2835_pwm *pc = platform_get_drvdata(pdev); - - pwmchip_remove(&pc->chip); + struct bcm2835_pwm *pc = dev_get_drvdata(dev); - clk_disable_unprepare(pc->clk); + return clk_prepare_enable(pc->clk); } +static DEFINE_SIMPLE_DEV_PM_OPS(bcm2835_pwm_pm_ops, bcm2835_pwm_suspend, + bcm2835_pwm_resume); + static const struct of_device_id bcm2835_pwm_of_match[] = { { .compatible = "brcm,bcm2835-pwm", }, { /* sentinel */ } @@ -192,9 +192,9 @@ static struct platform_driver bcm2835_pwm_driver = { .driver = { .name = "bcm2835-pwm", .of_match_table = bcm2835_pwm_of_match, + .pm = pm_ptr(&bcm2835_pwm_pm_ops), }, .probe = bcm2835_pwm_probe, - .remove_new = bcm2835_pwm_remove, }; module_platform_driver(bcm2835_pwm_driver); diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 0971c666afd1..ba2d79991769 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -39,6 +39,8 @@ #define BERLIN_PWM_TCNT 0xc #define BERLIN_PWM_MAX_TCNT 65535 +#define BERLIN_PWM_NUMPWMS 4 + struct berlin_pwm_channel { u32 enable; u32 ctrl; @@ -50,6 +52,7 @@ struct berlin_pwm_chip { struct pwm_chip chip; struct clk *clk; void __iomem *base; + struct berlin_pwm_channel channel[BERLIN_PWM_NUMPWMS]; }; static inline struct berlin_pwm_chip *to_berlin_pwm_chip(struct pwm_chip *chip) @@ -70,24 +73,6 @@ static inline void berlin_pwm_writel(struct berlin_pwm_chip *bpc, writel_relaxed(value, bpc->base + channel * 0x10 + offset); } -static int berlin_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct berlin_pwm_channel *channel; - - channel = kzalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return -ENOMEM; - - return pwm_set_chip_data(pwm, channel); -} - -static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm); - - kfree(channel); -} - static int berlin_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, u64 duty_ns, u64 period_ns) { @@ -202,10 +187,7 @@ static int berlin_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops berlin_pwm_ops = { - .request = berlin_pwm_request, - .free = berlin_pwm_free, .apply = berlin_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id berlin_pwm_match[] = { @@ -227,39 +209,23 @@ static int berlin_pwm_probe(struct platform_device *pdev) if (IS_ERR(bpc->base)) return PTR_ERR(bpc->base); - bpc->clk = devm_clk_get(&pdev->dev, NULL); + bpc->clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(bpc->clk)) return PTR_ERR(bpc->clk); - ret = clk_prepare_enable(bpc->clk); - if (ret) - return ret; - bpc->chip.dev = &pdev->dev; bpc->chip.ops = &berlin_pwm_ops; - bpc->chip.npwm = 4; + bpc->chip.npwm = BERLIN_PWM_NUMPWMS; - ret = pwmchip_add(&bpc->chip); - if (ret < 0) { - dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); - clk_disable_unprepare(bpc->clk); - return ret; - } + ret = devm_pwmchip_add(&pdev->dev, &bpc->chip); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); platform_set_drvdata(pdev, bpc); return 0; } -static void berlin_pwm_remove(struct platform_device *pdev) -{ - struct berlin_pwm_chip *bpc = platform_get_drvdata(pdev); - - pwmchip_remove(&bpc->chip); - - clk_disable_unprepare(bpc->clk); -} - #ifdef CONFIG_PM_SLEEP static int berlin_pwm_suspend(struct device *dev) { @@ -267,11 +233,7 @@ static int berlin_pwm_suspend(struct device *dev) unsigned int i; for (i = 0; i < bpc->chip.npwm; i++) { - struct berlin_pwm_channel *channel; - - channel = pwm_get_chip_data(&bpc->chip.pwms[i]); - if (!channel) - continue; + struct berlin_pwm_channel *channel = &bpc->channel[i]; channel->enable = berlin_pwm_readl(bpc, i, BERLIN_PWM_ENABLE); channel->ctrl = berlin_pwm_readl(bpc, i, BERLIN_PWM_CONTROL); @@ -295,11 +257,7 @@ static int berlin_pwm_resume(struct device *dev) return ret; for (i = 0; i < bpc->chip.npwm; i++) { - struct berlin_pwm_channel *channel; - - channel = pwm_get_chip_data(&bpc->chip.pwms[i]); - if (!channel) - continue; + struct berlin_pwm_channel *channel = &bpc->channel[i]; berlin_pwm_writel(bpc, i, channel->ctrl, BERLIN_PWM_CONTROL); berlin_pwm_writel(bpc, i, channel->duty, BERLIN_PWM_DUTY); @@ -316,7 +274,6 @@ static SIMPLE_DEV_PM_OPS(berlin_pwm_pm_ops, berlin_pwm_suspend, static struct platform_driver berlin_pwm_driver = { .probe = berlin_pwm_probe, - .remove_new = berlin_pwm_remove, .driver = { .name = "berlin-pwm", .of_match_table = berlin_pwm_match, diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c index a3faa9a3de7c..b723c2d4f485 100644 --- a/drivers/pwm/pwm-brcmstb.c +++ b/drivers/pwm/pwm-brcmstb.c @@ -220,7 +220,6 @@ static int brcmstb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops brcmstb_pwm_ops = { .apply = brcmstb_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id brcmstb_pwm_of_match[] = { @@ -238,17 +237,10 @@ static int brcmstb_pwm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; - p->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(p->clk)) { - dev_err(&pdev->dev, "failed to obtain clock\n"); - return PTR_ERR(p->clk); - } - - ret = clk_prepare_enable(p->clk); - if (ret < 0) { - dev_err(&pdev->dev, "failed to enable clock: %d\n", ret); - return ret; - } + p->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(p->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(p->clk), + "failed to obtain clock\n"); platform_set_drvdata(pdev, p); @@ -257,30 +249,14 @@ static int brcmstb_pwm_probe(struct platform_device *pdev) p->chip.npwm = 2; p->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(p->base)) { - ret = PTR_ERR(p->base); - goto out_clk; - } + if (IS_ERR(p->base)) + return PTR_ERR(p->base); - ret = pwmchip_add(&p->chip); - if (ret) { - dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); - goto out_clk; - } + ret = devm_pwmchip_add(&pdev->dev, &p->chip); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); return 0; - -out_clk: - clk_disable_unprepare(p->clk); - return ret; -} - -static void brcmstb_pwm_remove(struct platform_device *pdev) -{ - struct brcmstb_pwm *p = platform_get_drvdata(pdev); - - pwmchip_remove(&p->chip); - clk_disable_unprepare(p->clk); } #ifdef CONFIG_PM_SLEEP @@ -288,7 +264,7 @@ static int brcmstb_pwm_suspend(struct device *dev) { struct brcmstb_pwm *p = dev_get_drvdata(dev); - clk_disable(p->clk); + clk_disable_unprepare(p->clk); return 0; } @@ -297,9 +273,7 @@ static int brcmstb_pwm_resume(struct device *dev) { struct brcmstb_pwm *p = dev_get_drvdata(dev); - clk_enable(p->clk); - - return 0; + return clk_prepare_enable(p->clk); } #endif @@ -308,7 +282,6 @@ static SIMPLE_DEV_PM_OPS(brcmstb_pwm_pm_ops, brcmstb_pwm_suspend, static struct platform_driver brcmstb_pwm_driver = { .probe = brcmstb_pwm_probe, - .remove_new = brcmstb_pwm_remove, .driver = { .name = "pwm-brcmstb", .of_match_table = brcmstb_pwm_of_match, diff --git a/drivers/pwm/pwm-clk.c b/drivers/pwm/pwm-clk.c index 0ee4d2aee4df..9dd88b386907 100644 --- a/drivers/pwm/pwm-clk.c +++ b/drivers/pwm/pwm-clk.c @@ -77,7 +77,6 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_clk_ops = { .apply = pwm_clk_apply, - .owner = THIS_MODULE, }; static int pwm_clk_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c index b0d91142da8d..42179b3f7ec3 100644 --- a/drivers/pwm/pwm-clps711x.c +++ b/drivers/pwm/pwm-clps711x.c @@ -72,7 +72,6 @@ static int clps711x_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops clps711x_pwm_ops = { .request = clps711x_pwm_request, .apply = clps711x_pwm_apply, - .owner = THIS_MODULE, }; static struct pwm_device *clps711x_pwm_xlate(struct pwm_chip *chip, diff --git a/drivers/pwm/pwm-crc.c b/drivers/pwm/pwm-crc.c index b9f063dc6b5f..2b0b659eee97 100644 --- a/drivers/pwm/pwm-crc.c +++ b/drivers/pwm/pwm-crc.c @@ -184,5 +184,8 @@ static struct platform_driver crystalcove_pwm_driver = { .name = "crystal_cove_pwm", }, }; +module_platform_driver(crystalcove_pwm_driver); -builtin_platform_driver(crystalcove_pwm_driver); +MODULE_ALIAS("platform:crystal_cove_pwm"); +MODULE_DESCRIPTION("Intel Crystalcove (CRC) PWM support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index baaac0c33aa0..4fbd23e4ef69 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -22,12 +22,14 @@ * @ec: Pointer to EC device * @chip: PWM controller chip * @use_pwm_type: Use PWM types instead of generic channels + * @channel: array with per-channel data */ struct cros_ec_pwm_device { struct device *dev; struct cros_ec_device *ec; struct pwm_chip chip; bool use_pwm_type; + struct cros_ec_pwm *channel; }; /** @@ -43,26 +45,6 @@ static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chi return container_of(chip, struct cros_ec_pwm_device, chip); } -static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct cros_ec_pwm *channel; - - channel = kzalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return -ENOMEM; - - pwm_set_chip_data(pwm, channel); - - return 0; -} - -static void cros_ec_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct cros_ec_pwm *channel = pwm_get_chip_data(pwm); - - kfree(channel); -} - static int cros_ec_dt_type_to_pwm_type(u8 dt_index, u8 *pwm_type) { switch (dt_index) { @@ -158,7 +140,7 @@ static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); - struct cros_ec_pwm *channel = pwm_get_chip_data(pwm); + struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm]; u16 duty_cycle; int ret; @@ -188,7 +170,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); - struct cros_ec_pwm *channel = pwm_get_chip_data(pwm); + struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm]; int ret; ret = cros_ec_pwm_get_duty(ec_pwm, pwm->hwpwm); @@ -237,11 +219,8 @@ cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args) } static const struct pwm_ops cros_ec_pwm_ops = { - .request = cros_ec_pwm_request, - .free = cros_ec_pwm_free, .get_state = cros_ec_pwm_get_state, .apply = cros_ec_pwm_apply, - .owner = THIS_MODULE, }; /* @@ -286,10 +265,8 @@ static int cros_ec_pwm_probe(struct platform_device *pdev) struct pwm_chip *chip; int ret; - if (!ec) { - dev_err(dev, "no parent EC device\n"); - return -EINVAL; - } + if (!ec) + return dev_err_probe(dev, -EINVAL, "no parent EC device\n"); ec_pwm = devm_kzalloc(dev, sizeof(*ec_pwm), GFP_KERNEL); if (!ec_pwm) @@ -310,32 +287,23 @@ static int cros_ec_pwm_probe(struct platform_device *pdev) chip->npwm = CROS_EC_PWM_DT_COUNT; } else { ret = cros_ec_num_pwms(ec_pwm); - if (ret < 0) { - dev_err(dev, "Couldn't find PWMs: %d\n", ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "Couldn't find PWMs\n"); chip->npwm = ret; } - dev_dbg(dev, "Probed %u PWMs\n", chip->npwm); - - ret = pwmchip_add(chip); - if (ret < 0) { - dev_err(dev, "cannot register PWM: %d\n", ret); - return ret; - } - - platform_set_drvdata(pdev, ec_pwm); + ec_pwm->channel = devm_kcalloc(dev, chip->npwm, sizeof(*ec_pwm->channel), + GFP_KERNEL); + if (!ec_pwm->channel) + return -ENOMEM; - return ret; -} + dev_dbg(dev, "Probed %u PWMs\n", chip->npwm); -static void cros_ec_pwm_remove(struct platform_device *dev) -{ - struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev); - struct pwm_chip *chip = &ec_pwm->chip; + ret = devm_pwmchip_add(dev, chip); + if (ret < 0) + return dev_err_probe(dev, ret, "cannot register PWM\n"); - pwmchip_remove(chip); + return 0; } #ifdef CONFIG_OF @@ -349,7 +317,6 @@ MODULE_DEVICE_TABLE(of, cros_ec_pwm_of_match); static struct platform_driver cros_ec_pwm_driver = { .probe = cros_ec_pwm_probe, - .remove_new = cros_ec_pwm_remove, .driver = { .name = "cros-ec-pwm", .of_match_table = of_match_ptr(cros_ec_pwm_of_match), diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c new file mode 100644 index 000000000000..ea63dd741f5c --- /dev/null +++ b/drivers/pwm/pwm-dwc-core.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DesignWare PWM Controller driver core + * + * Copyright (C) 2018-2020 Intel Corporation + * + * Author: Felipe Balbi (Intel) + * Author: Jarkko Nikula <jarkko.nikula@linux.intel.com> + * Author: Raymond Tan <raymond.tan@intel.com> + */ + +#define DEFAULT_SYMBOL_NAMESPACE dwc_pwm + +#include <linux/bitops.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/pwm.h> + +#include "pwm-dwc.h" + +static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled) +{ + u32 reg; + + reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm)); + + if (enabled) + reg |= DWC_TIM_CTRL_EN; + else + reg &= ~DWC_TIM_CTRL_EN; + + dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm)); +} + +static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc, + struct pwm_device *pwm, + const struct pwm_state *state) +{ + u64 tmp; + u32 ctrl; + u32 high; + u32 low; + + /* + * Calculate width of low and high period in terms of input clock + * periods and check are the result within HW limits between 1 and + * 2^32 periods. + */ + tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns); + if (tmp < 1 || tmp > (1ULL << 32)) + return -ERANGE; + low = tmp - 1; + + tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle, + dwc->clk_ns); + if (tmp < 1 || tmp > (1ULL << 32)) + return -ERANGE; + high = tmp - 1; + + /* + * Specification says timer usage flow is to disable timer, then + * program it followed by enable. It also says Load Count is loaded + * into timer after it is enabled - either after a disable or + * a reset. Based on measurements it happens also without disable + * whenever Load Count is updated. But follow the specification. + */ + __dwc_pwm_set_enable(dwc, pwm->hwpwm, false); + + /* + * Write Load Count and Load Count 2 registers. Former defines the + * width of low period and latter the width of high period in terms + * multiple of input clock periods: + * Width = ((Count + 1) * input clock period). + */ + dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm)); + dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm)); + + /* + * Set user-defined mode, timer reloads from Load Count registers + * when it counts down to 0. + * Set PWM mode, it makes output to toggle and width of low and high + * periods are set by Load Count registers. + */ + ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM; + dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm)); + + /* + * Enable timer. Output starts from low period. + */ + __dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled); + + return 0; +} + +static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + struct dwc_pwm *dwc = to_dwc_pwm(chip); + + if (state->polarity != PWM_POLARITY_INVERSED) + return -EINVAL; + + if (state->enabled) { + if (!pwm->state.enabled) + pm_runtime_get_sync(chip->dev); + return __dwc_pwm_configure_timer(dwc, pwm, state); + } else { + if (pwm->state.enabled) { + __dwc_pwm_set_enable(dwc, pwm->hwpwm, false); + pm_runtime_put_sync(chip->dev); + } + } + + return 0; +} + +static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct dwc_pwm *dwc = to_dwc_pwm(chip); + u64 duty, period; + u32 ctrl, ld, ld2; + + pm_runtime_get_sync(chip->dev); + + ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm)); + ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm)); + ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm)); + + state->enabled = !!(ctrl & DWC_TIM_CTRL_EN); + + /* + * If we're not in PWM, technically the output is a 50-50 + * based on the timer load-count only. + */ + if (ctrl & DWC_TIM_CTRL_PWM) { + duty = (ld + 1) * dwc->clk_ns; + period = (ld2 + 1) * dwc->clk_ns; + period += duty; + } else { + duty = (ld + 1) * dwc->clk_ns; + period = duty * 2; + } + + state->polarity = PWM_POLARITY_INVERSED; + state->period = period; + state->duty_cycle = duty; + + pm_runtime_put_sync(chip->dev); + + return 0; +} + +static const struct pwm_ops dwc_pwm_ops = { + .apply = dwc_pwm_apply, + .get_state = dwc_pwm_get_state, +}; + +struct dwc_pwm *dwc_pwm_alloc(struct device *dev) +{ + struct dwc_pwm *dwc; + + dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL); + if (!dwc) + return NULL; + + dwc->clk_ns = 10; + dwc->chip.dev = dev; + dwc->chip.ops = &dwc_pwm_ops; + dwc->chip.npwm = DWC_TIMERS_TOTAL; + + dev_set_drvdata(dev, dwc); + return dwc; +} +EXPORT_SYMBOL_GPL(dwc_pwm_alloc); + +MODULE_AUTHOR("Felipe Balbi (Intel)"); +MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@linux.intel.com>"); +MODULE_AUTHOR("Raymond Tan <raymond.tan@intel.com>"); +MODULE_DESCRIPTION("DesignWare PWM Controller"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c index 3bbb26c862c3..bd9cadb497d7 100644 --- a/drivers/pwm/pwm-dwc.c +++ b/drivers/pwm/pwm-dwc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DesignWare PWM Controller driver + * DesignWare PWM Controller driver (PCI part) * * Copyright (C) 2018-2020 Intel Corporation * @@ -13,6 +13,8 @@ * periods are one or more input clock periods long. */ +#define DEFAULT_MOUDLE_NAMESPACE dwc_pwm + #include <linux/bitops.h> #include <linux/export.h> #include <linux/kernel.h> @@ -21,198 +23,7 @@ #include <linux/pm_runtime.h> #include <linux/pwm.h> -#define DWC_TIM_LD_CNT(n) ((n) * 0x14) -#define DWC_TIM_LD_CNT2(n) (((n) * 4) + 0xb0) -#define DWC_TIM_CUR_VAL(n) (((n) * 0x14) + 0x04) -#define DWC_TIM_CTRL(n) (((n) * 0x14) + 0x08) -#define DWC_TIM_EOI(n) (((n) * 0x14) + 0x0c) -#define DWC_TIM_INT_STS(n) (((n) * 0x14) + 0x10) - -#define DWC_TIMERS_INT_STS 0xa0 -#define DWC_TIMERS_EOI 0xa4 -#define DWC_TIMERS_RAW_INT_STS 0xa8 -#define DWC_TIMERS_COMP_VERSION 0xac - -#define DWC_TIMERS_TOTAL 8 -#define DWC_CLK_PERIOD_NS 10 - -/* Timer Control Register */ -#define DWC_TIM_CTRL_EN BIT(0) -#define DWC_TIM_CTRL_MODE BIT(1) -#define DWC_TIM_CTRL_MODE_FREE (0 << 1) -#define DWC_TIM_CTRL_MODE_USER (1 << 1) -#define DWC_TIM_CTRL_INT_MASK BIT(2) -#define DWC_TIM_CTRL_PWM BIT(3) - -struct dwc_pwm_ctx { - u32 cnt; - u32 cnt2; - u32 ctrl; -}; - -struct dwc_pwm { - struct pwm_chip chip; - void __iomem *base; - struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL]; -}; -#define to_dwc_pwm(p) (container_of((p), struct dwc_pwm, chip)) - -static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset) -{ - return readl(dwc->base + offset); -} - -static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset) -{ - writel(value, dwc->base + offset); -} - -static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled) -{ - u32 reg; - - reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm)); - - if (enabled) - reg |= DWC_TIM_CTRL_EN; - else - reg &= ~DWC_TIM_CTRL_EN; - - dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm)); -} - -static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc, - struct pwm_device *pwm, - const struct pwm_state *state) -{ - u64 tmp; - u32 ctrl; - u32 high; - u32 low; - - /* - * Calculate width of low and high period in terms of input clock - * periods and check are the result within HW limits between 1 and - * 2^32 periods. - */ - tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, DWC_CLK_PERIOD_NS); - if (tmp < 1 || tmp > (1ULL << 32)) - return -ERANGE; - low = tmp - 1; - - tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle, - DWC_CLK_PERIOD_NS); - if (tmp < 1 || tmp > (1ULL << 32)) - return -ERANGE; - high = tmp - 1; - - /* - * Specification says timer usage flow is to disable timer, then - * program it followed by enable. It also says Load Count is loaded - * into timer after it is enabled - either after a disable or - * a reset. Based on measurements it happens also without disable - * whenever Load Count is updated. But follow the specification. - */ - __dwc_pwm_set_enable(dwc, pwm->hwpwm, false); - - /* - * Write Load Count and Load Count 2 registers. Former defines the - * width of low period and latter the width of high period in terms - * multiple of input clock periods: - * Width = ((Count + 1) * input clock period). - */ - dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm)); - dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm)); - - /* - * Set user-defined mode, timer reloads from Load Count registers - * when it counts down to 0. - * Set PWM mode, it makes output to toggle and width of low and high - * periods are set by Load Count registers. - */ - ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM; - dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm)); - - /* - * Enable timer. Output starts from low period. - */ - __dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled); - - return 0; -} - -static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - const struct pwm_state *state) -{ - struct dwc_pwm *dwc = to_dwc_pwm(chip); - - if (state->polarity != PWM_POLARITY_INVERSED) - return -EINVAL; - - if (state->enabled) { - if (!pwm->state.enabled) - pm_runtime_get_sync(chip->dev); - return __dwc_pwm_configure_timer(dwc, pwm, state); - } else { - if (pwm->state.enabled) { - __dwc_pwm_set_enable(dwc, pwm->hwpwm, false); - pm_runtime_put_sync(chip->dev); - } - } - - return 0; -} - -static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) -{ - struct dwc_pwm *dwc = to_dwc_pwm(chip); - u64 duty, period; - - pm_runtime_get_sync(chip->dev); - - state->enabled = !!(dwc_pwm_readl(dwc, - DWC_TIM_CTRL(pwm->hwpwm)) & DWC_TIM_CTRL_EN); - - duty = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm)); - duty += 1; - duty *= DWC_CLK_PERIOD_NS; - state->duty_cycle = duty; - - period = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm)); - period += 1; - period *= DWC_CLK_PERIOD_NS; - period += duty; - state->period = period; - - state->polarity = PWM_POLARITY_INVERSED; - - pm_runtime_put_sync(chip->dev); - - return 0; -} - -static const struct pwm_ops dwc_pwm_ops = { - .apply = dwc_pwm_apply, - .get_state = dwc_pwm_get_state, - .owner = THIS_MODULE, -}; - -static struct dwc_pwm *dwc_pwm_alloc(struct device *dev) -{ - struct dwc_pwm *dwc; - - dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL); - if (!dwc) - return NULL; - - dwc->chip.dev = dev; - dwc->chip.ops = &dwc_pwm_ops; - dwc->chip.npwm = DWC_TIMERS_TOTAL; - - dev_set_drvdata(dev, dwc); - return dwc; -} +#include "pwm-dwc.h" static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id) { diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h new file mode 100644 index 000000000000..64795247c54c --- /dev/null +++ b/drivers/pwm/pwm-dwc.h @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DesignWare PWM Controller driver + * + * Copyright (C) 2018-2020 Intel Corporation + * + * Author: Felipe Balbi (Intel) + * Author: Jarkko Nikula <jarkko.nikula@linux.intel.com> + * Author: Raymond Tan <raymond.tan@intel.com> + */ + +MODULE_IMPORT_NS(dwc_pwm); + +#define DWC_TIM_LD_CNT(n) ((n) * 0x14) +#define DWC_TIM_LD_CNT2(n) (((n) * 4) + 0xb0) +#define DWC_TIM_CUR_VAL(n) (((n) * 0x14) + 0x04) +#define DWC_TIM_CTRL(n) (((n) * 0x14) + 0x08) +#define DWC_TIM_EOI(n) (((n) * 0x14) + 0x0c) +#define DWC_TIM_INT_STS(n) (((n) * 0x14) + 0x10) + +#define DWC_TIMERS_INT_STS 0xa0 +#define DWC_TIMERS_EOI 0xa4 +#define DWC_TIMERS_RAW_INT_STS 0xa8 +#define DWC_TIMERS_COMP_VERSION 0xac + +#define DWC_TIMERS_TOTAL 8 + +/* Timer Control Register */ +#define DWC_TIM_CTRL_EN BIT(0) +#define DWC_TIM_CTRL_MODE BIT(1) +#define DWC_TIM_CTRL_MODE_FREE (0 << 1) +#define DWC_TIM_CTRL_MODE_USER (1 << 1) +#define DWC_TIM_CTRL_INT_MASK BIT(2) +#define DWC_TIM_CTRL_PWM BIT(3) + +struct dwc_pwm_ctx { + u32 cnt; + u32 cnt2; + u32 ctrl; +}; + +struct dwc_pwm { + struct pwm_chip chip; + void __iomem *base; + unsigned int clk_ns; + struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL]; +}; +#define to_dwc_pwm(p) (container_of((p), struct dwc_pwm, chip)) + +static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset) +{ + return readl(dwc->base + offset); +} + +static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset) +{ + writel(value, dwc->base + offset); +} + +extern struct dwc_pwm *dwc_pwm_alloc(struct device *dev); diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index c45a75e65c86..51e072572a87 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -159,7 +159,6 @@ static const struct pwm_ops ep93xx_pwm_ops = { .request = ep93xx_pwm_request, .free = ep93xx_pwm_free, .apply = ep93xx_pwm_apply, - .owner = THIS_MODULE, }; static int ep93xx_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index b7c6045c5d08..d1b6d1aa4773 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -350,7 +350,6 @@ static const struct pwm_ops fsl_pwm_ops = { .request = fsl_pwm_request, .free = fsl_pwm_free, .apply = fsl_pwm_apply, - .owner = THIS_MODULE, }; static int fsl_pwm_init(struct fsl_pwm_chip *fpc) diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index f7ba6fe9a349..c435776e2f78 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -185,7 +185,6 @@ static const struct pwm_ops hibvt_pwm_ops = { .get_state = hibvt_pwm_get_state, .apply = hibvt_pwm_apply, - .owner = THIS_MODULE, }; static int hibvt_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 326af85888e7..116fa060e302 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -208,7 +208,6 @@ static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops img_pwm_ops = { .apply = img_pwm_apply, - .owner = THIS_MODULE, }; static const struct img_pwm_soc_data pistachio_pwm = { diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 98ab65c89685..dc6aafeb9f7b 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -332,7 +332,6 @@ static const struct pwm_ops imx_tpm_pwm_ops = { .free = pwm_imx_tpm_free, .get_state = pwm_imx_tpm_get_state, .apply = pwm_imx_tpm_apply, - .owner = THIS_MODULE, }; static int pwm_imx_tpm_probe(struct platform_device *pdev) @@ -351,18 +350,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev) if (IS_ERR(tpm->base)) return PTR_ERR(tpm->base); - tpm->clk = devm_clk_get(&pdev->dev, NULL); + tpm->clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(tpm->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(tpm->clk), "failed to get PWM clock\n"); - ret = clk_prepare_enable(tpm->clk); - if (ret) { - dev_err(&pdev->dev, - "failed to prepare or enable clock: %d\n", ret); - return ret; - } - tpm->chip.dev = &pdev->dev; tpm->chip.ops = &imx_tpm_pwm_ops; @@ -372,22 +364,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev) mutex_init(&tpm->lock); - ret = pwmchip_add(&tpm->chip); - if (ret) { - dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); - clk_disable_unprepare(tpm->clk); - } - - return ret; -} - -static void pwm_imx_tpm_remove(struct platform_device *pdev) -{ - struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev); - - pwmchip_remove(&tpm->chip); + ret = devm_pwmchip_add(&pdev->dev, &tpm->chip); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); - clk_disable_unprepare(tpm->clk); + return 0; } static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev) @@ -437,7 +418,6 @@ static struct platform_driver imx_tpm_pwm_driver = { .pm = &imx_tpm_pwm_pm, }, .probe = pwm_imx_tpm_probe, - .remove_new = pwm_imx_tpm_remove, }; module_platform_driver(imx_tpm_pwm_driver); diff --git a/drivers/pwm/pwm-imx1.c b/drivers/pwm/pwm-imx1.c index 0651983bed19..d175d895f22a 100644 --- a/drivers/pwm/pwm-imx1.c +++ b/drivers/pwm/pwm-imx1.c @@ -146,7 +146,6 @@ static int pwm_imx1_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_imx1_ops = { .apply = pwm_imx1_apply, - .owner = THIS_MODULE, }; static const struct of_device_id pwm_imx1_dt_ids[] = { diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 29a3089c534c..7d9bc43f12b0 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -296,7 +296,6 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_imx27_ops = { .apply = pwm_imx27_apply, .get_state = pwm_imx27_get_state, - .owner = THIS_MODULE, }; static const struct of_device_id pwm_imx27_dt_ids[] = { diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c index 0cd7dd548e82..54ecae7f937e 100644 --- a/drivers/pwm/pwm-intel-lgm.c +++ b/drivers/pwm/pwm-intel-lgm.c @@ -107,7 +107,6 @@ static int lgm_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops lgm_pwm_ops = { .get_state = lgm_pwm_get_state, .apply = lgm_pwm_apply, - .owner = THIS_MODULE, }; static void lgm_pwm_init(struct lgm_pwm_chip *pc) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 47b3141135f3..378ab036edfe 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -166,7 +166,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier, static const struct pwm_ops iqs620_pwm_ops = { .apply = iqs620_pwm_apply, .get_state = iqs620_pwm_get_state, - .owner = THIS_MODULE, }; static void iqs620_pwm_notifier_unregister(void *context) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index ef1293f2a897..e9375de60ad6 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -27,6 +27,7 @@ struct soc_info { struct jz4740_pwm_chip { struct pwm_chip chip; struct regmap *map; + struct clk *clk[]; }; static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip) @@ -70,14 +71,15 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) return err; } - pwm_set_chip_data(pwm, clk); + jz->clk[pwm->hwpwm] = clk; return 0; } static void jz4740_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { - struct clk *clk = pwm_get_chip_data(pwm); + struct jz4740_pwm_chip *jz = to_jz4740(chip); + struct clk *clk = jz->clk[pwm->hwpwm]; clk_disable_unprepare(clk); clk_put(clk); @@ -121,9 +123,9 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip); + struct jz4740_pwm_chip *jz = to_jz4740(pwm->chip); unsigned long long tmp = 0xffffull * NSEC_PER_SEC; - struct clk *clk = pwm_get_chip_data(pwm); + struct clk *clk = jz->clk[pwm->hwpwm]; unsigned long period, duty; long rate; int err; @@ -173,16 +175,16 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } /* Reset counter to 0 */ - regmap_write(jz4740->map, TCU_REG_TCNTc(pwm->hwpwm), 0); + regmap_write(jz->map, TCU_REG_TCNTc(pwm->hwpwm), 0); /* Set duty */ - regmap_write(jz4740->map, TCU_REG_TDHRc(pwm->hwpwm), duty); + regmap_write(jz->map, TCU_REG_TDHRc(pwm->hwpwm), duty); /* Set period */ - regmap_write(jz4740->map, TCU_REG_TDFRc(pwm->hwpwm), period); + regmap_write(jz->map, TCU_REG_TDFRc(pwm->hwpwm), period); /* Set abrupt shutdown */ - regmap_set_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + regmap_set_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), TCU_TCSR_PWM_SD); /* @@ -199,10 +201,10 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, * state instead of its inactive state. */ if ((state->polarity == PWM_POLARITY_NORMAL) ^ state->enabled) - regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), TCU_TCSR_PWM_INITL_HIGH, 0); else - regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm), + regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), TCU_TCSR_PWM_INITL_HIGH, TCU_TCSR_PWM_INITL_HIGH); @@ -216,34 +218,34 @@ static const struct pwm_ops jz4740_pwm_ops = { .request = jz4740_pwm_request, .free = jz4740_pwm_free, .apply = jz4740_pwm_apply, - .owner = THIS_MODULE, }; static int jz4740_pwm_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct jz4740_pwm_chip *jz4740; + struct jz4740_pwm_chip *jz; const struct soc_info *info; info = device_get_match_data(dev); if (!info) return -EINVAL; - jz4740 = devm_kzalloc(dev, sizeof(*jz4740), GFP_KERNEL); - if (!jz4740) + jz = devm_kzalloc(dev, struct_size(jz, clk, info->num_pwms), + GFP_KERNEL); + if (!jz) return -ENOMEM; - jz4740->map = device_node_to_regmap(dev->parent->of_node); - if (IS_ERR(jz4740->map)) { - dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz4740->map)); - return PTR_ERR(jz4740->map); + jz->map = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(jz->map)) { + dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz->map)); + return PTR_ERR(jz->map); } - jz4740->chip.dev = dev; - jz4740->chip.ops = &jz4740_pwm_ops; - jz4740->chip.npwm = info->num_pwms; + jz->chip.dev = dev; + jz->chip.ops = &jz4740_pwm_ops; + jz->chip.npwm = info->num_pwms; - return devm_pwmchip_add(dev, &jz4740->chip); + return devm_pwmchip_add(dev, &jz->chip); } static const struct soc_info jz4740_soc_info = { diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c index ac02d8bb4a0b..ac824ecc3f64 100644 --- a/drivers/pwm/pwm-keembay.c +++ b/drivers/pwm/pwm-keembay.c @@ -178,7 +178,6 @@ static int keembay_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops keembay_pwm_ops = { - .owner = THIS_MODULE, .apply = keembay_pwm_apply, .get_state = keembay_pwm_get_state, }; diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index 4b133a17f4be..32350a357278 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -23,6 +23,7 @@ struct lp3943_pwm { struct pwm_chip chip; struct lp3943 *lp3943; struct lp3943_platform_data *pdata; + struct lp3943_pwm_map pwm_map[LP3943_NUM_PWMS]; }; static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip) @@ -35,13 +36,9 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm) { struct lp3943_platform_data *pdata = lp3943_pwm->pdata; struct lp3943 *lp3943 = lp3943_pwm->lp3943; - struct lp3943_pwm_map *pwm_map; + struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[hwpwm]; int i, offset; - pwm_map = kzalloc(sizeof(*pwm_map), GFP_KERNEL); - if (!pwm_map) - return ERR_PTR(-ENOMEM); - pwm_map->output = pdata->pwms[hwpwm]->output; pwm_map->num_outputs = pdata->pwms[hwpwm]->num_outputs; @@ -49,10 +46,8 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm) offset = pwm_map->output[i]; /* Return an error if the pin is already assigned */ - if (test_and_set_bit(offset, &lp3943->pin_used)) { - kfree(pwm_map); + if (test_and_set_bit(offset, &lp3943->pin_used)) return ERR_PTR(-EBUSY); - } } return pwm_map; @@ -67,7 +62,7 @@ static int lp3943_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) if (IS_ERR(pwm_map)) return PTR_ERR(pwm_map); - return pwm_set_chip_data(pwm, pwm_map); + return 0; } static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm, @@ -80,14 +75,12 @@ static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm, offset = pwm_map->output[i]; clear_bit(offset, &lp3943->pin_used); } - - kfree(pwm_map); } static void lp3943_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip); - struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm); + struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm]; lp3943_pwm_free_map(lp3943_pwm, pwm_map); } @@ -159,7 +152,7 @@ static int lp3943_pwm_set_mode(struct lp3943_pwm *lp3943_pwm, static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip); - struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm); + struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm]; u8 val; if (pwm->hwpwm == 0) @@ -178,7 +171,7 @@ static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) static void lp3943_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip); - struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm); + struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm]; /* * LP3943 outputs are open-drain, so the pin should be configured @@ -216,7 +209,6 @@ static const struct pwm_ops lp3943_pwm_ops = { .request = lp3943_pwm_request, .free = lp3943_pwm_free, .apply = lp3943_pwm_apply, - .owner = THIS_MODULE, }; static int lp3943_pwm_parse_dt(struct device *dev, diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 7a19a840bca5..ef7d0da137ed 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -341,7 +341,6 @@ static const struct pwm_ops lpc18xx_pwm_ops = { .apply = lpc18xx_pwm_apply, .request = lpc18xx_pwm_request, .free = lpc18xx_pwm_free, - .owner = THIS_MODULE, }; static const struct of_device_id lpc18xx_pwm_of_match[] = { diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 806f0bb3ad6d..78f664e41e6e 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -115,7 +115,6 @@ static int lpc32xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops lpc32xx_pwm_ops = { .apply = lpc32xx_pwm_apply, - .owner = THIS_MODULE, }; static int lpc32xx_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 23fe332b2394..a6ea3ce7e019 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -243,7 +243,6 @@ static int pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_lpss_ops = { .apply = pwm_lpss_apply, .get_state = pwm_lpss_get_state, - .owner = THIS_MODULE, }; struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 6adb0ed01906..373abfd25acb 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -229,7 +229,6 @@ static int pwm_mediatek_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_mediatek_ops = { .apply = pwm_mediatek_apply, - .owner = THIS_MODULE, }; static int pwm_mediatek_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 25519cddc2a9..5bea53243ed2 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -335,7 +335,6 @@ static const struct pwm_ops meson_pwm_ops = { .free = meson_pwm_free, .apply = meson_pwm_apply, .get_state = meson_pwm_get_state, - .owner = THIS_MODULE, }; static const char * const pwm_meson8b_parent_names[] = { diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c index e7525c98105e..c0c53968f3e9 100644 --- a/drivers/pwm/pwm-microchip-core.c +++ b/drivers/pwm/pwm-microchip-core.c @@ -435,7 +435,6 @@ static int mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm static const struct pwm_ops mchp_core_pwm_ops = { .apply = mchp_core_pwm_apply, .get_state = mchp_core_pwm_get_state, - .owner = THIS_MODULE, }; static const struct of_device_id mchp_core_of_match[] = { diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c index a83bd6e18b07..a72f7be36996 100644 --- a/drivers/pwm/pwm-mtk-disp.c +++ b/drivers/pwm/pwm-mtk-disp.c @@ -227,7 +227,6 @@ static int mtk_disp_pwm_get_state(struct pwm_chip *chip, static const struct pwm_ops mtk_disp_pwm_ops = { .apply = mtk_disp_pwm_apply, .get_state = mtk_disp_pwm_get_state, - .owner = THIS_MODULE, }; static int mtk_disp_pwm_probe(struct platform_device *pdev) @@ -247,34 +246,25 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev) mdp->clk_main = devm_clk_get(&pdev->dev, "main"); if (IS_ERR(mdp->clk_main)) - return PTR_ERR(mdp->clk_main); + return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_main), + "Failed to get main clock\n"); mdp->clk_mm = devm_clk_get(&pdev->dev, "mm"); if (IS_ERR(mdp->clk_mm)) - return PTR_ERR(mdp->clk_mm); + return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_mm), + "Failed to get mm clock\n"); mdp->chip.dev = &pdev->dev; mdp->chip.ops = &mtk_disp_pwm_ops; mdp->chip.npwm = 1; - ret = pwmchip_add(&mdp->chip); - if (ret < 0) { - dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret)); - return ret; - } - - platform_set_drvdata(pdev, mdp); + ret = devm_pwmchip_add(&pdev->dev, &mdp->chip); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n"); return 0; } -static void mtk_disp_pwm_remove(struct platform_device *pdev) -{ - struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev); - - pwmchip_remove(&mdp->chip); -} - static const struct mtk_pwm_data mt2701_pwm_data = { .enable_mask = BIT(16), .con0 = 0xa8, @@ -320,7 +310,6 @@ static struct platform_driver mtk_disp_pwm_driver = { .of_match_table = mtk_disp_pwm_of_match, }, .probe = mtk_disp_pwm_probe, - .remove_new = mtk_disp_pwm_remove, }; module_platform_driver(mtk_disp_pwm_driver); diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index 766dbc58dad8..1b5e787d78f1 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -115,7 +115,6 @@ static int mxs_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops mxs_pwm_ops = { .apply = mxs_pwm_apply, - .owner = THIS_MODULE, }; static int mxs_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c index 7514ea384ec5..78606039eda2 100644 --- a/drivers/pwm/pwm-ntxec.c +++ b/drivers/pwm/pwm-ntxec.c @@ -126,7 +126,6 @@ static int ntxec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm_dev, } static const struct pwm_ops ntxec_pwm_ops = { - .owner = THIS_MODULE, .apply = ntxec_pwm_apply, /* * No .get_state callback, because the current state cannot be read diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 4889fbd8a431..13161e08dd6e 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -311,7 +311,6 @@ unlock_mutex: static const struct pwm_ops pwm_omap_dmtimer_ops = { .apply = pwm_omap_dmtimer_apply, - .owner = THIS_MODULE, }; static int pwm_omap_dmtimer_probe(struct platform_device *pdev) @@ -466,7 +465,7 @@ MODULE_DEVICE_TABLE(of, pwm_omap_dmtimer_of_match); static struct platform_driver pwm_omap_dmtimer_driver = { .driver = { .name = "omap-dmtimer-pwm", - .of_match_table = of_match_ptr(pwm_omap_dmtimer_of_match), + .of_match_table = pwm_omap_dmtimer_of_match, }, .probe = pwm_omap_dmtimer_probe, .remove_new = pwm_omap_dmtimer_remove, diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 3038a68412a7..e79b1de8c4d8 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -505,7 +505,6 @@ static const struct pwm_ops pca9685_pwm_ops = { .get_state = pca9685_pwm_get_state, .request = pca9685_pwm_request, .free = pca9685_pwm_free, - .owner = THIS_MODULE, }; static const struct regmap_config pca9685_regmap_i2c_config = { diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index 1e475ed10180..76685f926c75 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -24,7 +24,7 @@ #include <linux/clk.h> #include <linux/io.h> #include <linux/pwm.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <asm/div64.h> @@ -135,7 +135,6 @@ static int pxa_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pxa_pwm_ops = { .apply = pxa_pwm_apply, - .owner = THIS_MODULE, }; #ifdef CONFIG_OF diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c index 2939b71a7ba7..1ad814fdec6b 100644 --- a/drivers/pwm/pwm-raspberrypi-poe.c +++ b/drivers/pwm/pwm-raspberrypi-poe.c @@ -135,7 +135,6 @@ static int raspberrypi_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops raspberrypi_pwm_ops = { .get_state = raspberrypi_pwm_get_state, .apply = raspberrypi_pwm_apply, - .owner = THIS_MODULE, }; static int raspberrypi_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 5b5f357c44de..13269f55fccf 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -198,7 +198,6 @@ static const struct pwm_ops rcar_pwm_ops = { .request = rcar_pwm_request, .free = rcar_pwm_free, .apply = rcar_pwm_apply, - .owner = THIS_MODULE, }; static int rcar_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index d7311614c846..4239f2c3e8b2 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -85,6 +85,7 @@ struct tpu_device { void __iomem *base; struct clk *clk; + struct tpu_pwm_device tpd[TPU_CHANNEL_MAX]; }; #define to_tpu_device(c) container_of(c, struct tpu_device, chip) @@ -215,9 +216,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) if (pwm->hwpwm >= TPU_CHANNEL_MAX) return -EINVAL; - tpd = kzalloc(sizeof(*tpd), GFP_KERNEL); - if (tpd == NULL) - return -ENOMEM; + tpd = &tpu->tpd[pwm->hwpwm]; tpd->tpu = tpu; tpd->channel = pwm->hwpwm; @@ -228,24 +227,22 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) tpd->timer_on = false; - pwm_set_chip_data(pwm, tpd); - return 0; } static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { - struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm); + struct tpu_device *tpu = to_tpu_device(chip); + struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm]; tpu_pwm_timer_stop(tpd); - kfree(tpd); } static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, u64 duty_ns, u64 period_ns, bool enabled) { - struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm); struct tpu_device *tpu = to_tpu_device(chip); + struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm]; unsigned int prescaler; bool duty_only = false; u32 clk_rate; @@ -353,7 +350,8 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm, enum pwm_polarity polarity) { - struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm); + struct tpu_device *tpu = to_tpu_device(chip); + struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm]; tpd->polarity = polarity; @@ -362,7 +360,8 @@ static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm, static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm); + struct tpu_device *tpu = to_tpu_device(chip); + struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm]; int ret; ret = tpu_pwm_timer_start(tpd); @@ -384,7 +383,8 @@ static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) static void tpu_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm); + struct tpu_device *tpu = to_tpu_device(chip); + struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm]; /* The timer must be running to modify the pin output configuration. */ tpu_pwm_timer_start(tpd); @@ -431,7 +431,6 @@ static const struct pwm_ops tpu_pwm_ops = { .request = tpu_pwm_request, .free = tpu_pwm_free, .apply = tpu_pwm_apply, - .owner = THIS_MODULE, }; /* ----------------------------------------------------------------------------- diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 03ee18fb82d5..cce4381e188a 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -228,7 +228,6 @@ out: static const struct pwm_ops rockchip_pwm_ops = { .get_state = rockchip_pwm_get_state, .apply = rockchip_pwm_apply, - .owner = THIS_MODULE, }; static const struct rockchip_pwm_data pwm_data_v1 = { diff --git a/drivers/pwm/pwm-rz-mtu3.c b/drivers/pwm/pwm-rz-mtu3.c index a56cecb0e46e..bdda315b3bd3 100644 --- a/drivers/pwm/pwm-rz-mtu3.c +++ b/drivers/pwm/pwm-rz-mtu3.c @@ -438,7 +438,6 @@ static const struct pwm_ops rz_mtu3_pwm_ops = { .free = rz_mtu3_pwm_free, .get_state = rz_mtu3_pwm_get_state, .apply = rz_mtu3_pwm_apply, - .owner = THIS_MODULE, }; static int rz_mtu3_pwm_pm_runtime_suspend(struct device *dev) diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index e8828f57ab15..69d9f4577b34 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -77,6 +77,7 @@ struct samsung_pwm_channel { * @base_clk: base clock used to drive the timers * @tclk0: external clock 0 (can be ERR_PTR if not present) * @tclk1: external clock 1 (can be ERR_PTR if not present) + * @channel: per channel driver data */ struct samsung_pwm_chip { struct pwm_chip chip; @@ -88,6 +89,7 @@ struct samsung_pwm_chip { struct clk *base_clk; struct clk *tclk0; struct clk *tclk1; + struct samsung_pwm_channel channel[SAMSUNG_PWM_NUM]; }; #ifndef CONFIG_CLKSRC_SAMSUNG_PWM @@ -117,21 +119,21 @@ static inline unsigned int to_tcon_channel(unsigned int channel) return (channel == 0) ? 0 : (channel + 1); } -static void __pwm_samsung_manual_update(struct samsung_pwm_chip *chip, +static void __pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip, struct pwm_device *pwm) { unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm); u32 tcon; - tcon = readl(chip->base + REG_TCON); + tcon = readl(our_chip->base + REG_TCON); tcon |= TCON_MANUALUPDATE(tcon_chan); - writel(tcon, chip->base + REG_TCON); + writel(tcon, our_chip->base + REG_TCON); tcon &= ~TCON_MANUALUPDATE(tcon_chan); - writel(tcon, chip->base + REG_TCON); + writel(tcon, our_chip->base + REG_TCON); } -static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm, +static void pwm_samsung_set_divisor(struct samsung_pwm_chip *our_chip, unsigned int channel, u8 divisor) { u8 shift = TCFG1_SHIFT(channel); @@ -139,39 +141,39 @@ static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm, u32 reg; u8 bits; - bits = (fls(divisor) - 1) - pwm->variant.div_base; + bits = (fls(divisor) - 1) - our_chip->variant.div_base; spin_lock_irqsave(&samsung_pwm_lock, flags); - reg = readl(pwm->base + REG_TCFG1); + reg = readl(our_chip->base + REG_TCFG1); reg &= ~(TCFG1_MUX_MASK << shift); reg |= bits << shift; - writel(reg, pwm->base + REG_TCFG1); + writel(reg, our_chip->base + REG_TCFG1); spin_unlock_irqrestore(&samsung_pwm_lock, flags); } -static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *chip, unsigned int chan) +static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *our_chip, unsigned int chan) { - struct samsung_pwm_variant *variant = &chip->variant; + struct samsung_pwm_variant *variant = &our_chip->variant; u32 reg; - reg = readl(chip->base + REG_TCFG1); + reg = readl(our_chip->base + REG_TCFG1); reg >>= TCFG1_SHIFT(chan); reg &= TCFG1_MUX_MASK; return (BIT(reg) & variant->tclk_mask) == 0; } -static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip, +static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *our_chip, unsigned int chan) { unsigned long rate; u32 reg; - rate = clk_get_rate(chip->base_clk); + rate = clk_get_rate(our_chip->base_clk); - reg = readl(chip->base + REG_TCFG0); + reg = readl(our_chip->base + REG_TCFG0); if (chan >= 2) reg >>= TCFG0_PRESCALER1_SHIFT; reg &= TCFG0_PRESCALER_MASK; @@ -179,28 +181,28 @@ static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip, return rate / (reg + 1); } -static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip, +static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *our_chip, unsigned int chan, unsigned long freq) { - struct samsung_pwm_variant *variant = &chip->variant; + struct samsung_pwm_variant *variant = &our_chip->variant; unsigned long rate; struct clk *clk; u8 div; - if (!pwm_samsung_is_tdiv(chip, chan)) { - clk = (chan < 2) ? chip->tclk0 : chip->tclk1; + if (!pwm_samsung_is_tdiv(our_chip, chan)) { + clk = (chan < 2) ? our_chip->tclk0 : our_chip->tclk1; if (!IS_ERR(clk)) { rate = clk_get_rate(clk); if (rate) return rate; } - dev_warn(chip->chip.dev, + dev_warn(our_chip->chip.dev, "tclk of PWM %d is inoperational, using tdiv\n", chan); } - rate = pwm_samsung_get_tin_rate(chip, chan); - dev_dbg(chip->chip.dev, "tin parent at %lu\n", rate); + rate = pwm_samsung_get_tin_rate(our_chip, chan); + dev_dbg(our_chip->chip.dev, "tin parent at %lu\n", rate); /* * Compare minimum PWM frequency that can be achieved with possible @@ -220,7 +222,7 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip, div = variant->div_base; } - pwm_samsung_set_divisor(chip, chan, BIT(div)); + pwm_samsung_set_divisor(our_chip, chan, BIT(div)); return rate >> div; } @@ -228,7 +230,6 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip, static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm) { struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip); - struct samsung_pwm_channel *our_chan; if (!(our_chip->variant.output_mask & BIT(pwm->hwpwm))) { dev_warn(chip->dev, @@ -237,20 +238,11 @@ static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm) return -EINVAL; } - our_chan = kzalloc(sizeof(*our_chan), GFP_KERNEL); - if (!our_chan) - return -ENOMEM; - - pwm_set_chip_data(pwm, our_chan); + memset(&our_chip->channel[pwm->hwpwm], 0, sizeof(our_chip->channel[pwm->hwpwm])); return 0; } -static void pwm_samsung_free(struct pwm_chip *chip, struct pwm_device *pwm) -{ - kfree(pwm_get_chip_data(pwm)); -} - static int pwm_samsung_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip); @@ -302,14 +294,14 @@ static void pwm_samsung_disable(struct pwm_chip *chip, struct pwm_device *pwm) spin_unlock_irqrestore(&samsung_pwm_lock, flags); } -static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip, +static void pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip, struct pwm_device *pwm) { unsigned long flags; spin_lock_irqsave(&samsung_pwm_lock, flags); - __pwm_samsung_manual_update(chip, pwm); + __pwm_samsung_manual_update(our_chip, pwm); spin_unlock_irqrestore(&samsung_pwm_lock, flags); } @@ -318,7 +310,7 @@ static int __pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns, bool force_period) { struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip); - struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm); + struct samsung_pwm_channel *chan = &our_chip->channel[pwm->hwpwm]; u32 tin_ns = chan->tin_ns, tcnt, tcmp, oldtcmp; tcnt = readl(our_chip->base + REG_TCNTB(pwm->hwpwm)); @@ -393,7 +385,7 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm, return __pwm_samsung_config(chip, pwm, duty_ns, period_ns, false); } -static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip, +static void pwm_samsung_set_invert(struct samsung_pwm_chip *our_chip, unsigned int channel, bool invert) { unsigned int tcon_chan = to_tcon_channel(channel); @@ -402,17 +394,17 @@ static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip, spin_lock_irqsave(&samsung_pwm_lock, flags); - tcon = readl(chip->base + REG_TCON); + tcon = readl(our_chip->base + REG_TCON); if (invert) { - chip->inverter_mask |= BIT(channel); + our_chip->inverter_mask |= BIT(channel); tcon |= TCON_INVERT(tcon_chan); } else { - chip->inverter_mask &= ~BIT(channel); + our_chip->inverter_mask &= ~BIT(channel); tcon &= ~TCON_INVERT(tcon_chan); } - writel(tcon, chip->base + REG_TCON); + writel(tcon, our_chip->base + REG_TCON); spin_unlock_irqrestore(&samsung_pwm_lock, flags); } @@ -473,9 +465,7 @@ static int pwm_samsung_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_samsung_ops = { .request = pwm_samsung_request, - .free = pwm_samsung_free, .apply = pwm_samsung_apply, - .owner = THIS_MODULE, }; #ifdef CONFIG_OF @@ -517,9 +507,9 @@ static const struct of_device_id samsung_pwm_matches[] = { }; MODULE_DEVICE_TABLE(of, samsung_pwm_matches); -static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip) +static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip) { - struct device_node *np = chip->chip.dev->of_node; + struct device_node *np = our_chip->chip.dev->of_node; const struct of_device_id *match; struct property *prop; const __be32 *cur; @@ -529,22 +519,22 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip) if (!match) return -ENODEV; - memcpy(&chip->variant, match->data, sizeof(chip->variant)); + memcpy(&our_chip->variant, match->data, sizeof(our_chip->variant)); of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) { if (val >= SAMSUNG_PWM_NUM) { - dev_err(chip->chip.dev, + dev_err(our_chip->chip.dev, "%s: invalid channel index in samsung,pwm-outputs property\n", __func__); continue; } - chip->variant.output_mask |= BIT(val); + our_chip->variant.output_mask |= BIT(val); } return 0; } #else -static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip) +static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip) { return -ENODEV; } @@ -553,21 +543,21 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip) static int pwm_samsung_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct samsung_pwm_chip *chip; + struct samsung_pwm_chip *our_chip; unsigned int chan; int ret; - chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); - if (chip == NULL) + our_chip = devm_kzalloc(&pdev->dev, sizeof(*our_chip), GFP_KERNEL); + if (our_chip == NULL) return -ENOMEM; - chip->chip.dev = &pdev->dev; - chip->chip.ops = &pwm_samsung_ops; - chip->chip.npwm = SAMSUNG_PWM_NUM; - chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1; + our_chip->chip.dev = &pdev->dev; + our_chip->chip.ops = &pwm_samsung_ops; + our_chip->chip.npwm = SAMSUNG_PWM_NUM; + our_chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1; if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) { - ret = pwm_samsung_parse_dt(chip); + ret = pwm_samsung_parse_dt(our_chip); if (ret) return ret; } else { @@ -576,58 +566,58 @@ static int pwm_samsung_probe(struct platform_device *pdev) return -EINVAL; } - memcpy(&chip->variant, pdev->dev.platform_data, - sizeof(chip->variant)); + memcpy(&our_chip->variant, pdev->dev.platform_data, + sizeof(our_chip->variant)); } - chip->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(chip->base)) - return PTR_ERR(chip->base); + our_chip->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(our_chip->base)) + return PTR_ERR(our_chip->base); - chip->base_clk = devm_clk_get(&pdev->dev, "timers"); - if (IS_ERR(chip->base_clk)) { + our_chip->base_clk = devm_clk_get(&pdev->dev, "timers"); + if (IS_ERR(our_chip->base_clk)) { dev_err(dev, "failed to get timer base clk\n"); - return PTR_ERR(chip->base_clk); + return PTR_ERR(our_chip->base_clk); } - ret = clk_prepare_enable(chip->base_clk); + ret = clk_prepare_enable(our_chip->base_clk); if (ret < 0) { dev_err(dev, "failed to enable base clock\n"); return ret; } for (chan = 0; chan < SAMSUNG_PWM_NUM; ++chan) - if (chip->variant.output_mask & BIT(chan)) - pwm_samsung_set_invert(chip, chan, true); + if (our_chip->variant.output_mask & BIT(chan)) + pwm_samsung_set_invert(our_chip, chan, true); /* Following clocks are optional. */ - chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0"); - chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1"); + our_chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0"); + our_chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1"); - platform_set_drvdata(pdev, chip); + platform_set_drvdata(pdev, our_chip); - ret = pwmchip_add(&chip->chip); + ret = pwmchip_add(&our_chip->chip); if (ret < 0) { dev_err(dev, "failed to register PWM chip\n"); - clk_disable_unprepare(chip->base_clk); + clk_disable_unprepare(our_chip->base_clk); return ret; } dev_dbg(dev, "base_clk at %lu, tclk0 at %lu, tclk1 at %lu\n", - clk_get_rate(chip->base_clk), - !IS_ERR(chip->tclk0) ? clk_get_rate(chip->tclk0) : 0, - !IS_ERR(chip->tclk1) ? clk_get_rate(chip->tclk1) : 0); + clk_get_rate(our_chip->base_clk), + !IS_ERR(our_chip->tclk0) ? clk_get_rate(our_chip->tclk0) : 0, + !IS_ERR(our_chip->tclk1) ? clk_get_rate(our_chip->tclk1) : 0); return 0; } static void pwm_samsung_remove(struct platform_device *pdev) { - struct samsung_pwm_chip *chip = platform_get_drvdata(pdev); + struct samsung_pwm_chip *our_chip = platform_get_drvdata(pdev); - pwmchip_remove(&chip->chip); + pwmchip_remove(&our_chip->chip); - clk_disable_unprepare(chip->base_clk); + clk_disable_unprepare(our_chip->base_clk); } #ifdef CONFIG_PM_SLEEP @@ -639,9 +629,9 @@ static int pwm_samsung_resume(struct device *dev) for (i = 0; i < SAMSUNG_PWM_NUM; i++) { struct pwm_device *pwm = &chip->pwms[i]; - struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm); + struct samsung_pwm_channel *chan = &our_chip->channel[i]; - if (!chan) + if (!test_bit(PWMF_REQUESTED, &pwm->flags)) continue; if (our_chip->variant.output_mask & BIT(i)) diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index eabddb7c7820..089e50bdbbf0 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -203,7 +203,6 @@ static const struct pwm_ops pwm_sifive_ops = { .free = pwm_sifive_free, .get_state = pwm_sifive_get_state, .apply = pwm_sifive_apply, - .owner = THIS_MODULE, }; static int pwm_sifive_clock_notifier(struct notifier_block *nb, diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c index 9e42e3a74ad6..88b01ff9e460 100644 --- a/drivers/pwm/pwm-sl28cpld.c +++ b/drivers/pwm/pwm-sl28cpld.c @@ -200,7 +200,6 @@ static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sl28cpld_pwm_ops = { .apply = sl28cpld_pwm_apply, .get_state = sl28cpld_pwm_get_state, - .owner = THIS_MODULE, }; static int sl28cpld_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 4e1cfd8d7c03..ff991319feef 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -189,7 +189,6 @@ static int spear_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops spear_pwm_ops = { .apply = spear_pwm_apply, - .owner = THIS_MODULE, }; static int spear_pwm_probe(struct platform_device *pdev) @@ -207,26 +206,21 @@ static int spear_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->mmio_base)) return PTR_ERR(pc->mmio_base); - pc->clk = devm_clk_get(&pdev->dev, NULL); + pc->clk = devm_clk_get_prepared(&pdev->dev, NULL); if (IS_ERR(pc->clk)) - return PTR_ERR(pc->clk); - - platform_set_drvdata(pdev, pc); + return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk), + "Failed to get clock\n"); pc->chip.dev = &pdev->dev; pc->chip.ops = &spear_pwm_ops; pc->chip.npwm = NUM_PWM; - ret = clk_prepare(pc->clk); - if (ret) - return ret; - if (of_device_is_compatible(np, "st,spear1340-pwm")) { ret = clk_enable(pc->clk); - if (ret) { - clk_unprepare(pc->clk); - return ret; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to enable clk\n"); + /* * Following enables PWM chip, channels would still be * enabled individually through their control register @@ -238,23 +232,11 @@ static int spear_pwm_probe(struct platform_device *pdev) clk_disable(pc->clk); } - ret = pwmchip_add(&pc->chip); - if (ret < 0) { - clk_unprepare(pc->clk); - dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); - } + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n"); - return ret; -} - -static void spear_pwm_remove(struct platform_device *pdev) -{ - struct spear_pwm_chip *pc = platform_get_drvdata(pdev); - - pwmchip_remove(&pc->chip); - - /* clk was prepared in probe, hence unprepare it here */ - clk_unprepare(pc->clk); + return 0; } static const struct of_device_id spear_pwm_of_match[] = { @@ -271,7 +253,6 @@ static struct platform_driver spear_pwm_driver = { .of_match_table = spear_pwm_of_match, }, .probe = spear_pwm_probe, - .remove_new = spear_pwm_remove, }; module_platform_driver(spear_pwm_driver); diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index 1499c8c1fe37..77939e161006 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -40,6 +40,11 @@ struct sprd_pwm_chip { struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM]; }; +static inline struct sprd_pwm_chip* sprd_pwm_from_chip(struct pwm_chip *chip) +{ + return container_of(chip, struct sprd_pwm_chip, chip); +} + /* * The list of clocks required by PWM channels, and each channel has 2 clocks: * enable clock and pwm clock. @@ -69,8 +74,7 @@ static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid, static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { - struct sprd_pwm_chip *spc = - container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip); struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; u32 val, duty, prescale; u64 tmp; @@ -162,8 +166,7 @@ static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm, static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct sprd_pwm_chip *spc = - container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip); struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; struct pwm_state *cstate = &pwm->state; int ret; @@ -210,7 +213,6 @@ static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sprd_pwm_ops = { .apply = sprd_pwm_apply, .get_state = sprd_pwm_get_state, - .owner = THIS_MODULE, }; static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc) @@ -240,10 +242,8 @@ static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc) chn->clk_rate = clk_get_rate(clk_pwm); } - if (!i) { - dev_err(spc->dev, "no available PWM channels\n"); - return -ENODEV; - } + if (!i) + return dev_err_probe(spc->dev, -ENODEV, "no available PWM channels\n"); spc->num_pwms = i; @@ -264,7 +264,6 @@ static int sprd_pwm_probe(struct platform_device *pdev) return PTR_ERR(spc->base); spc->dev = &pdev->dev; - platform_set_drvdata(pdev, spc); ret = sprd_pwm_clk_init(spc); if (ret) @@ -274,20 +273,13 @@ static int sprd_pwm_probe(struct platform_device *pdev) spc->chip.ops = &sprd_pwm_ops; spc->chip.npwm = spc->num_pwms; - ret = pwmchip_add(&spc->chip); + ret = devm_pwmchip_add(&pdev->dev, &spc->chip); if (ret) dev_err(&pdev->dev, "failed to add PWM chip\n"); return ret; } -static void sprd_pwm_remove(struct platform_device *pdev) -{ - struct sprd_pwm_chip *spc = platform_get_drvdata(pdev); - - pwmchip_remove(&spc->chip); -} - static const struct of_device_id sprd_pwm_of_match[] = { { .compatible = "sprd,ums512-pwm", }, { }, @@ -300,7 +292,6 @@ static struct platform_driver sprd_pwm_driver = { .of_match_table = sprd_pwm_of_match, }, .probe = sprd_pwm_probe, - .remove_new = sprd_pwm_remove, }; module_platform_driver(sprd_pwm_driver); diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index b1d1373648a3..dc92cea31cd0 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -79,6 +79,7 @@ struct sti_pwm_compat_data { unsigned int cpt_num_devs; unsigned int max_pwm_cnt; unsigned int max_prescale; + struct sti_cpt_ddata *ddata; }; struct sti_pwm_chip { @@ -314,7 +315,7 @@ static int sti_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm, { struct sti_pwm_chip *pc = to_sti_pwmchip(chip); struct sti_pwm_compat_data *cdata = pc->cdata; - struct sti_cpt_ddata *ddata = pwm_get_chip_data(pwm); + struct sti_cpt_ddata *ddata = &cdata->ddata[pwm->hwpwm]; struct device *dev = pc->dev; unsigned int effective_ticks; unsigned long long high, low; @@ -420,7 +421,6 @@ static const struct pwm_ops sti_pwm_ops = { .capture = sti_pwm_capture, .apply = sti_pwm_apply, .free = sti_pwm_free, - .owner = THIS_MODULE, }; static irqreturn_t sti_pwm_interrupt(int irq, void *data) @@ -440,7 +440,7 @@ static irqreturn_t sti_pwm_interrupt(int irq, void *data) while (cpt_int_stat) { devicenum = ffs(cpt_int_stat) - 1; - ddata = pwm_get_chip_data(&pc->chip.pwms[devicenum]); + ddata = &pc->cdata->ddata[devicenum]; /* * Capture input: @@ -638,30 +638,28 @@ static int sti_pwm_probe(struct platform_device *pdev) dev_err(dev, "failed to prepare clock\n"); return ret; } + + cdata->ddata = devm_kzalloc(dev, cdata->cpt_num_devs * sizeof(*cdata->ddata), GFP_KERNEL); + if (!cdata->ddata) + return -ENOMEM; } pc->chip.dev = dev; pc->chip.ops = &sti_pwm_ops; pc->chip.npwm = pc->cdata->pwm_num_devs; - ret = pwmchip_add(&pc->chip); - if (ret < 0) { - clk_unprepare(pc->pwm_clk); - clk_unprepare(pc->cpt_clk); - return ret; - } - for (i = 0; i < cdata->cpt_num_devs; i++) { - struct sti_cpt_ddata *ddata; - - ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); - if (!ddata) - return -ENOMEM; + struct sti_cpt_ddata *ddata = &cdata->ddata[i]; init_waitqueue_head(&ddata->wait); mutex_init(&ddata->lock); + } - pwm_set_chip_data(&pc->chip.pwms[i], ddata); + ret = pwmchip_add(&pc->chip); + if (ret < 0) { + clk_unprepare(pc->pwm_clk); + clk_unprepare(pc->cpt_clk); + return ret; } platform_set_drvdata(pdev, pc); diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index bb3a045a7334..b67974cc1872 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -189,7 +189,6 @@ static int stm32_pwm_lp_get_state(struct pwm_chip *chip, } static const struct pwm_ops stm32_pwm_lp_ops = { - .owner = THIS_MODULE, .apply = stm32_pwm_lp_apply, .get_state = stm32_pwm_lp_get_state, }; diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 3d6be7749e23..3303a754ea02 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -487,7 +487,6 @@ static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops stm32pwm_ops = { - .owner = THIS_MODULE, .apply = stm32_pwm_apply_locked, .capture = IS_ENABLED(CONFIG_DMA_ENGINE) ? stm32_pwm_capture : NULL, }; diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c index e205405c4828..a46f5b4dd816 100644 --- a/drivers/pwm/pwm-stmpe.c +++ b/drivers/pwm/pwm-stmpe.c @@ -287,7 +287,6 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops stmpe_24xx_pwm_ops = { .apply = stmpe_24xx_pwm_apply, - .owner = THIS_MODULE, }; static int __init stmpe_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index c84fcf1a13dc..1a439025540d 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -325,7 +325,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sun4i_pwm_ops = { .apply = sun4i_pwm_apply, .get_state = sun4i_pwm_get_state, - .owner = THIS_MODULE, }; static const struct sun4i_pwm_data sun4i_pwm_dual_nobypass = { diff --git a/drivers/pwm/pwm-sunplus.c b/drivers/pwm/pwm-sunplus.c index 7705c7b86c3a..773e2f80526e 100644 --- a/drivers/pwm/pwm-sunplus.c +++ b/drivers/pwm/pwm-sunplus.c @@ -163,7 +163,6 @@ static int sunplus_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sunplus_pwm_ops = { .apply = sunplus_pwm_apply, .get_state = sunplus_pwm_get_state, - .owner = THIS_MODULE, }; static void sunplus_pwm_clk_release(void *data) diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index a169a34e0778..39ea51e08c94 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -268,7 +268,6 @@ static int tegra_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops tegra_pwm_ops = { .apply = tegra_pwm_apply, - .owner = THIS_MODULE, }; static int tegra_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 8c94b266c1b2..11e3549cf103 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -205,7 +205,6 @@ static int ecap_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ecap_pwm_ops = { .apply = ecap_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id ecap_of_match[] = { diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index ecbfd7e954ec..66ac2655845f 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -437,7 +437,6 @@ static int ehrpwm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ehrpwm_pwm_ops = { .free = ehrpwm_pwm_free, .apply = ehrpwm_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id ehrpwm_of_match[] = { diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 8fb84b441853..625233f4703a 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -189,7 +189,6 @@ static int twl4030_pwmled_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops twl4030_pwmled_ops = { .apply = twl4030_pwmled_apply, - .owner = THIS_MODULE, }; static int twl6030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm, @@ -342,7 +341,6 @@ static const struct pwm_ops twl6030_pwmled_ops = { .apply = twl6030_pwmled_apply, .request = twl6030_pwmled_request, .free = twl6030_pwmled_free, - .owner = THIS_MODULE, }; static int twl_pwmled_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index 86567add79db..603d31f27470 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -333,12 +333,10 @@ static const struct pwm_ops twl4030_pwm_ops = { .apply = twl4030_pwm_apply, .request = twl4030_pwm_request, .free = twl4030_pwm_free, - .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwm_ops = { .apply = twl6030_pwm_apply, - .owner = THIS_MODULE, }; static int twl_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-visconti.c b/drivers/pwm/pwm-visconti.c index 7f7591a2384c..8d736d558122 100644 --- a/drivers/pwm/pwm-visconti.c +++ b/drivers/pwm/pwm-visconti.c @@ -129,7 +129,6 @@ static int visconti_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops visconti_pwm_ops = { .apply = visconti_pwm_apply, .get_state = visconti_pwm_get_state, - .owner = THIS_MODULE, }; static int visconti_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c index 6d46db51daac..5568d5312d3c 100644 --- a/drivers/pwm/pwm-vt8500.c +++ b/drivers/pwm/pwm-vt8500.c @@ -221,7 +221,6 @@ static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops vt8500_pwm_ops = { .apply = vt8500_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id vt8500_pwm_dt_ids[] = { @@ -236,10 +235,8 @@ static int vt8500_pwm_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int ret; - if (!np) { - dev_err(&pdev->dev, "invalid devicetree node\n"); - return -EINVAL; - } + if (!np) + return dev_err_probe(&pdev->dev, -EINVAL, "invalid devicetree node\n"); vt8500 = devm_kzalloc(&pdev->dev, sizeof(*vt8500), GFP_KERNEL); if (vt8500 == NULL) @@ -249,45 +246,23 @@ static int vt8500_pwm_probe(struct platform_device *pdev) vt8500->chip.ops = &vt8500_pwm_ops; vt8500->chip.npwm = VT8500_NR_PWMS; - vt8500->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(vt8500->clk)) { - dev_err(&pdev->dev, "clock source not specified\n"); - return PTR_ERR(vt8500->clk); - } + vt8500->clk = devm_clk_get_prepared(&pdev->dev, NULL); + if (IS_ERR(vt8500->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(vt8500->clk), "clock source not specified\n"); vt8500->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vt8500->base)) return PTR_ERR(vt8500->base); - ret = clk_prepare(vt8500->clk); - if (ret < 0) { - dev_err(&pdev->dev, "failed to prepare clock\n"); - return ret; - } - - ret = pwmchip_add(&vt8500->chip); - if (ret < 0) { - dev_err(&pdev->dev, "failed to add PWM chip\n"); - clk_unprepare(vt8500->clk); - return ret; - } - - platform_set_drvdata(pdev, vt8500); - return ret; -} - -static void vt8500_pwm_remove(struct platform_device *pdev) -{ - struct vt8500_chip *vt8500 = platform_get_drvdata(pdev); - - pwmchip_remove(&vt8500->chip); + ret = devm_pwmchip_add(&pdev->dev, &vt8500->chip); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); - clk_unprepare(vt8500->clk); + return 0; } static struct platform_driver vt8500_pwm_driver = { .probe = vt8500_pwm_probe, - .remove_new = vt8500_pwm_remove, .driver = { .name = "vt8500-pwm", .of_match_table = vt8500_pwm_dt_ids, diff --git a/drivers/pwm/pwm-xilinx.c b/drivers/pwm/pwm-xilinx.c index 85153ee90809..5f3c2a6fed11 100644 --- a/drivers/pwm/pwm-xilinx.c +++ b/drivers/pwm/pwm-xilinx.c @@ -198,7 +198,6 @@ static int xilinx_pwm_get_state(struct pwm_chip *chip, static const struct pwm_ops xilinx_pwm_ops = { .apply = xilinx_pwm_apply, .get_state = xilinx_pwm_get_state, - .owner = THIS_MODULE, }; static const struct regmap_config xilinx_pwm_regmap_config = { diff --git a/drivers/remoteproc/mtk_common.h b/drivers/remoteproc/mtk_common.h index ea6fa1100a00..6d7736a031f7 100644 --- a/drivers/remoteproc/mtk_common.h +++ b/drivers/remoteproc/mtk_common.h @@ -47,6 +47,7 @@ #define MT8192_SCP2SPM_IPC_CLR 0x4094 #define MT8192_GIPC_IN_SET 0x4098 #define MT8192_HOST_IPC_INT_BIT BIT(0) +#define MT8195_CORE1_HOST_IPC_INT_BIT BIT(4) #define MT8192_CORE0_SW_RSTN_CLR 0x10000 #define MT8192_CORE0_SW_RSTN_SET 0x10004 @@ -54,8 +55,28 @@ #define MT8192_CORE0_WDT_IRQ 0x10030 #define MT8192_CORE0_WDT_CFG 0x10034 +#define MT8195_SYS_STATUS 0x4004 +#define MT8195_CORE0_WDT BIT(16) +#define MT8195_CORE1_WDT BIT(17) + #define MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS GENMASK(7, 4) +#define MT8195_CPU1_SRAM_PD 0x1084 +#define MT8195_SSHUB2APMCU_IPC_SET 0x4088 +#define MT8195_SSHUB2APMCU_IPC_CLR 0x408C +#define MT8195_CORE1_SW_RSTN_CLR 0x20000 +#define MT8195_CORE1_SW_RSTN_SET 0x20004 +#define MT8195_CORE1_MEM_ATT_PREDEF 0x20008 +#define MT8195_CORE1_WDT_IRQ 0x20030 +#define MT8195_CORE1_WDT_CFG 0x20034 + +#define MT8195_SEC_CTRL 0x85000 +#define MT8195_CORE_OFFSET_ENABLE_D BIT(13) +#define MT8195_CORE_OFFSET_ENABLE_I BIT(12) +#define MT8195_L2TCM_OFFSET_RANGE_0_LOW 0x850b0 +#define MT8195_L2TCM_OFFSET_RANGE_0_HIGH 0x850b4 +#define MT8195_L2TCM_OFFSET 0x850d0 + #define SCP_FW_VER_LEN 32 #define SCP_SHARE_BUFFER_SIZE 288 @@ -91,17 +112,24 @@ struct mtk_scp_of_data { size_t ipi_buf_offset; }; +struct mtk_scp_of_cluster { + void __iomem *reg_base; + void __iomem *l1tcm_base; + size_t l1tcm_size; + phys_addr_t l1tcm_phys; + struct list_head mtk_scp_list; + /* Prevent concurrent operations of this structure and L2TCM power control. */ + struct mutex cluster_lock; + u32 l2tcm_refcnt; +}; + struct mtk_scp { struct device *dev; struct rproc *rproc; struct clk *clk; - void __iomem *reg_base; void __iomem *sram_base; size_t sram_size; phys_addr_t sram_phys; - void __iomem *l1tcm_base; - size_t l1tcm_size; - phys_addr_t l1tcm_phys; const struct mtk_scp_of_data *data; @@ -119,6 +147,9 @@ struct mtk_scp { size_t dram_size; struct rproc_subdev *rpmsg_subdev; + + struct list_head elem; + struct mtk_scp_of_cluster *cluster; }; /** diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index dcc94ee2458d..a35409eda0cf 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -68,8 +68,14 @@ EXPORT_SYMBOL_GPL(scp_put); static void scp_wdt_handler(struct mtk_scp *scp, u32 scp_to_host) { + struct mtk_scp_of_cluster *scp_cluster = scp->cluster; + struct mtk_scp *scp_node; + dev_err(scp->dev, "SCP watchdog timeout! 0x%x", scp_to_host); - rproc_report_crash(scp->rproc, RPROC_WATCHDOG); + + /* report watchdog timeout to all cores */ + list_for_each_entry(scp_node, &scp_cluster->mtk_scp_list, elem) + rproc_report_crash(scp_node->rproc, RPROC_WATCHDOG); } static void scp_init_ipi_handler(void *data, unsigned int len, void *priv) @@ -106,7 +112,7 @@ static void scp_ipi_handler(struct mtk_scp *scp) scp_ipi_lock(scp, id); handler = ipi_desc[id].handler; if (!handler) { - dev_err(scp->dev, "No such ipi id = %d\n", id); + dev_err(scp->dev, "No handler for ipi id = %d\n", id); scp_ipi_unlock(scp, id); return; } @@ -152,35 +158,45 @@ static void mt8183_scp_reset_assert(struct mtk_scp *scp) { u32 val; - val = readl(scp->reg_base + MT8183_SW_RSTN); + val = readl(scp->cluster->reg_base + MT8183_SW_RSTN); val &= ~MT8183_SW_RSTN_BIT; - writel(val, scp->reg_base + MT8183_SW_RSTN); + writel(val, scp->cluster->reg_base + MT8183_SW_RSTN); } static void mt8183_scp_reset_deassert(struct mtk_scp *scp) { u32 val; - val = readl(scp->reg_base + MT8183_SW_RSTN); + val = readl(scp->cluster->reg_base + MT8183_SW_RSTN); val |= MT8183_SW_RSTN_BIT; - writel(val, scp->reg_base + MT8183_SW_RSTN); + writel(val, scp->cluster->reg_base + MT8183_SW_RSTN); } static void mt8192_scp_reset_assert(struct mtk_scp *scp) { - writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET); + writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET); } static void mt8192_scp_reset_deassert(struct mtk_scp *scp) { - writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_CLR); + writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_CLR); +} + +static void mt8195_scp_c1_reset_assert(struct mtk_scp *scp) +{ + writel(1, scp->cluster->reg_base + MT8195_CORE1_SW_RSTN_SET); +} + +static void mt8195_scp_c1_reset_deassert(struct mtk_scp *scp) +{ + writel(1, scp->cluster->reg_base + MT8195_CORE1_SW_RSTN_CLR); } static void mt8183_scp_irq_handler(struct mtk_scp *scp) { u32 scp_to_host; - scp_to_host = readl(scp->reg_base + MT8183_SCP_TO_HOST); + scp_to_host = readl(scp->cluster->reg_base + MT8183_SCP_TO_HOST); if (scp_to_host & MT8183_SCP_IPC_INT_BIT) scp_ipi_handler(scp); else @@ -188,14 +204,14 @@ static void mt8183_scp_irq_handler(struct mtk_scp *scp) /* SCP won't send another interrupt until we set SCP_TO_HOST to 0. */ writel(MT8183_SCP_IPC_INT_BIT | MT8183_SCP_WDT_INT_BIT, - scp->reg_base + MT8183_SCP_TO_HOST); + scp->cluster->reg_base + MT8183_SCP_TO_HOST); } static void mt8192_scp_irq_handler(struct mtk_scp *scp) { u32 scp_to_host; - scp_to_host = readl(scp->reg_base + MT8192_SCP2APMCU_IPC_SET); + scp_to_host = readl(scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_SET); if (scp_to_host & MT8192_SCP_IPC_INT_BIT) { scp_ipi_handler(scp); @@ -205,13 +221,48 @@ static void mt8192_scp_irq_handler(struct mtk_scp *scp) * MT8192_SCP2APMCU_IPC. */ writel(MT8192_SCP_IPC_INT_BIT, - scp->reg_base + MT8192_SCP2APMCU_IPC_CLR); + scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_CLR); } else { scp_wdt_handler(scp, scp_to_host); - writel(1, scp->reg_base + MT8192_CORE0_WDT_IRQ); + writel(1, scp->cluster->reg_base + MT8192_CORE0_WDT_IRQ); } } +static void mt8195_scp_irq_handler(struct mtk_scp *scp) +{ + u32 scp_to_host; + + scp_to_host = readl(scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_SET); + + if (scp_to_host & MT8192_SCP_IPC_INT_BIT) { + scp_ipi_handler(scp); + } else { + u32 reason = readl(scp->cluster->reg_base + MT8195_SYS_STATUS); + + if (reason & MT8195_CORE0_WDT) + writel(1, scp->cluster->reg_base + MT8192_CORE0_WDT_IRQ); + + if (reason & MT8195_CORE1_WDT) + writel(1, scp->cluster->reg_base + MT8195_CORE1_WDT_IRQ); + + scp_wdt_handler(scp, reason); + } + + writel(scp_to_host, scp->cluster->reg_base + MT8192_SCP2APMCU_IPC_CLR); +} + +static void mt8195_scp_c1_irq_handler(struct mtk_scp *scp) +{ + u32 scp_to_host; + + scp_to_host = readl(scp->cluster->reg_base + MT8195_SSHUB2APMCU_IPC_SET); + + if (scp_to_host & MT8192_SCP_IPC_INT_BIT) + scp_ipi_handler(scp); + + writel(scp_to_host, scp->cluster->reg_base + MT8195_SSHUB2APMCU_IPC_CLR); +} + static irqreturn_t scp_irq_handler(int irq, void *priv) { struct mtk_scp *scp = priv; @@ -341,26 +392,26 @@ static int mt8195_scp_clk_get(struct mtk_scp *scp) static int mt8183_scp_before_load(struct mtk_scp *scp) { /* Clear SCP to host interrupt */ - writel(MT8183_SCP_IPC_INT_BIT, scp->reg_base + MT8183_SCP_TO_HOST); + writel(MT8183_SCP_IPC_INT_BIT, scp->cluster->reg_base + MT8183_SCP_TO_HOST); /* Reset clocks before loading FW */ - writel(0x0, scp->reg_base + MT8183_SCP_CLK_SW_SEL); - writel(0x0, scp->reg_base + MT8183_SCP_CLK_DIV_SEL); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_SW_SEL); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_DIV_SEL); /* Initialize TCM before loading FW. */ - writel(0x0, scp->reg_base + MT8183_SCP_L1_SRAM_PD); - writel(0x0, scp->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_L1_SRAM_PD); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD); /* Turn on the power of SCP's SRAM before using it. */ - writel(0x0, scp->reg_base + MT8183_SCP_SRAM_PDN); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_SRAM_PDN); /* * Set I-cache and D-cache size before loading SCP FW. * SCP SRAM logical address may change when cache size setting differs. */ writel(MT8183_SCP_CACHE_CON_WAYEN | MT8183_SCP_CACHESIZE_8KB, - scp->reg_base + MT8183_SCP_CACHE_CON); - writel(MT8183_SCP_CACHESIZE_8KB, scp->reg_base + MT8183_SCP_DCACHE_CON); + scp->cluster->reg_base + MT8183_SCP_CACHE_CON); + writel(MT8183_SCP_CACHESIZE_8KB, scp->cluster->reg_base + MT8183_SCP_DCACHE_CON); return 0; } @@ -386,28 +437,28 @@ static void scp_sram_power_off(void __iomem *addr, u32 reserved_mask) static int mt8186_scp_before_load(struct mtk_scp *scp) { /* Clear SCP to host interrupt */ - writel(MT8183_SCP_IPC_INT_BIT, scp->reg_base + MT8183_SCP_TO_HOST); + writel(MT8183_SCP_IPC_INT_BIT, scp->cluster->reg_base + MT8183_SCP_TO_HOST); /* Reset clocks before loading FW */ - writel(0x0, scp->reg_base + MT8183_SCP_CLK_SW_SEL); - writel(0x0, scp->reg_base + MT8183_SCP_CLK_DIV_SEL); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_SW_SEL); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_CLK_DIV_SEL); /* Turn on the power of SCP's SRAM before using it. Enable 1 block per time*/ - scp_sram_power_on(scp->reg_base + MT8183_SCP_SRAM_PDN, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8183_SCP_SRAM_PDN, 0); /* Initialize TCM before loading FW. */ - writel(0x0, scp->reg_base + MT8183_SCP_L1_SRAM_PD); - writel(0x0, scp->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD); - writel(0x0, scp->reg_base + MT8186_SCP_L1_SRAM_PD_P1); - writel(0x0, scp->reg_base + MT8186_SCP_L1_SRAM_PD_p2); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_L1_SRAM_PD); + writel(0x0, scp->cluster->reg_base + MT8183_SCP_TCM_TAIL_SRAM_PD); + writel(0x0, scp->cluster->reg_base + MT8186_SCP_L1_SRAM_PD_P1); + writel(0x0, scp->cluster->reg_base + MT8186_SCP_L1_SRAM_PD_p2); /* * Set I-cache and D-cache size before loading SCP FW. * SCP SRAM logical address may change when cache size setting differs. */ writel(MT8183_SCP_CACHE_CON_WAYEN | MT8183_SCP_CACHESIZE_8KB, - scp->reg_base + MT8183_SCP_CACHE_CON); - writel(MT8183_SCP_CACHESIZE_8KB, scp->reg_base + MT8183_SCP_DCACHE_CON); + scp->cluster->reg_base + MT8183_SCP_CACHE_CON); + writel(MT8183_SCP_CACHESIZE_8KB, scp->cluster->reg_base + MT8183_SCP_DCACHE_CON); return 0; } @@ -415,40 +466,100 @@ static int mt8186_scp_before_load(struct mtk_scp *scp) static int mt8192_scp_before_load(struct mtk_scp *scp) { /* clear SPM interrupt, SCP2SPM_IPC_CLR */ - writel(0xff, scp->reg_base + MT8192_SCP2SPM_IPC_CLR); + writel(0xff, scp->cluster->reg_base + MT8192_SCP2SPM_IPC_CLR); - writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET); + writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET); /* enable SRAM clock */ - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); - scp_sram_power_on(scp->reg_base + MT8192_L1TCM_SRAM_PDN, 0); - scp_sram_power_on(scp->reg_base + MT8192_CPU0_SRAM_PD, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0); /* enable MPU for all memory regions */ - writel(0xff, scp->reg_base + MT8192_CORE0_MEM_ATT_PREDEF); + writel(0xff, scp->cluster->reg_base + MT8192_CORE0_MEM_ATT_PREDEF); + + return 0; +} + +static int mt8195_scp_l2tcm_on(struct mtk_scp *scp) +{ + struct mtk_scp_of_cluster *scp_cluster = scp->cluster; + + mutex_lock(&scp_cluster->cluster_lock); + + if (scp_cluster->l2tcm_refcnt == 0) { + /* clear SPM interrupt, SCP2SPM_IPC_CLR */ + writel(0xff, scp->cluster->reg_base + MT8192_SCP2SPM_IPC_CLR); + + /* Power on L2TCM */ + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, + MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS); + } + + scp_cluster->l2tcm_refcnt += 1; + + mutex_unlock(&scp_cluster->cluster_lock); return 0; } static int mt8195_scp_before_load(struct mtk_scp *scp) { - /* clear SPM interrupt, SCP2SPM_IPC_CLR */ - writel(0xff, scp->reg_base + MT8192_SCP2SPM_IPC_CLR); + writel(1, scp->cluster->reg_base + MT8192_CORE0_SW_RSTN_SET); - writel(1, scp->reg_base + MT8192_CORE0_SW_RSTN_SET); + mt8195_scp_l2tcm_on(scp); - /* enable SRAM clock */ - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); - scp_sram_power_on(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); - scp_sram_power_on(scp->reg_base + MT8192_L1TCM_SRAM_PDN, - MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS); - scp_sram_power_on(scp->reg_base + MT8192_CPU0_SRAM_PD, 0); + scp_sram_power_on(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0); /* enable MPU for all memory regions */ - writel(0xff, scp->reg_base + MT8192_CORE0_MEM_ATT_PREDEF); + writel(0xff, scp->cluster->reg_base + MT8192_CORE0_MEM_ATT_PREDEF); + + return 0; +} + +static int mt8195_scp_c1_before_load(struct mtk_scp *scp) +{ + u32 sec_ctrl; + struct mtk_scp *scp_c0; + struct mtk_scp_of_cluster *scp_cluster = scp->cluster; + + scp->data->scp_reset_assert(scp); + + mt8195_scp_l2tcm_on(scp); + + scp_sram_power_on(scp->cluster->reg_base + MT8195_CPU1_SRAM_PD, 0); + + /* enable MPU for all memory regions */ + writel(0xff, scp->cluster->reg_base + MT8195_CORE1_MEM_ATT_PREDEF); + + /* + * The L2TCM_OFFSET_RANGE and L2TCM_OFFSET shift the destination address + * on SRAM when SCP core 1 accesses SRAM. + * + * This configuration solves booting the SCP core 0 and core 1 from + * different SRAM address because core 0 and core 1 both boot from + * the head of SRAM by default. this must be configured before boot SCP core 1. + * + * The value of L2TCM_OFFSET_RANGE is from the viewpoint of SCP core 1. + * When SCP core 1 issues address within the range (L2TCM_OFFSET_RANGE), + * the address will be added with a fixed offset (L2TCM_OFFSET) on the bus. + * The shift action is tranparent to software. + */ + writel(0, scp->cluster->reg_base + MT8195_L2TCM_OFFSET_RANGE_0_LOW); + writel(scp->sram_size, scp->cluster->reg_base + MT8195_L2TCM_OFFSET_RANGE_0_HIGH); + + scp_c0 = list_first_entry(&scp_cluster->mtk_scp_list, struct mtk_scp, elem); + writel(scp->sram_phys - scp_c0->sram_phys, scp->cluster->reg_base + MT8195_L2TCM_OFFSET); + + /* enable SRAM offset when fetching instruction and data */ + sec_ctrl = readl(scp->cluster->reg_base + MT8195_SEC_CTRL); + sec_ctrl |= MT8195_CORE_OFFSET_ENABLE_I | MT8195_CORE_OFFSET_ENABLE_D; + writel(sec_ctrl, scp->cluster->reg_base + MT8195_SEC_CTRL); return 0; } @@ -567,11 +678,11 @@ static void *mt8192_scp_da_to_va(struct mtk_scp *scp, u64 da, size_t len) } /* optional memory region */ - if (scp->l1tcm_size && - da >= scp->l1tcm_phys && - (da + len) <= scp->l1tcm_phys + scp->l1tcm_size) { - offset = da - scp->l1tcm_phys; - return (void __force *)scp->l1tcm_base + offset; + if (scp->cluster->l1tcm_size && + da >= scp->cluster->l1tcm_phys && + (da + len) <= scp->cluster->l1tcm_phys + scp->cluster->l1tcm_size) { + offset = da - scp->cluster->l1tcm_phys; + return (void __force *)scp->cluster->l1tcm_base + offset; } /* optional memory region */ @@ -595,34 +706,62 @@ static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iome static void mt8183_scp_stop(struct mtk_scp *scp) { /* Disable SCP watchdog */ - writel(0, scp->reg_base + MT8183_WDT_CFG); + writel(0, scp->cluster->reg_base + MT8183_WDT_CFG); } static void mt8192_scp_stop(struct mtk_scp *scp) { /* Disable SRAM clock */ - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); - scp_sram_power_off(scp->reg_base + MT8192_L1TCM_SRAM_PDN, 0); - scp_sram_power_off(scp->reg_base + MT8192_CPU0_SRAM_PD, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0); /* Disable SCP watchdog */ - writel(0, scp->reg_base + MT8192_CORE0_WDT_CFG); + writel(0, scp->cluster->reg_base + MT8192_CORE0_WDT_CFG); +} + +static void mt8195_scp_l2tcm_off(struct mtk_scp *scp) +{ + struct mtk_scp_of_cluster *scp_cluster = scp->cluster; + + mutex_lock(&scp_cluster->cluster_lock); + + if (scp_cluster->l2tcm_refcnt > 0) + scp_cluster->l2tcm_refcnt -= 1; + + if (scp_cluster->l2tcm_refcnt == 0) { + /* Power off L2TCM */ + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); + scp_sram_power_off(scp->cluster->reg_base + MT8192_L1TCM_SRAM_PDN, + MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS); + } + + mutex_unlock(&scp_cluster->cluster_lock); } static void mt8195_scp_stop(struct mtk_scp *scp) { - /* Disable SRAM clock */ - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_0, 0); - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_1, 0); - scp_sram_power_off(scp->reg_base + MT8192_L2TCM_SRAM_PD_2, 0); - scp_sram_power_off(scp->reg_base + MT8192_L1TCM_SRAM_PDN, - MT8195_L1TCM_SRAM_PDN_RESERVED_RSI_BITS); - scp_sram_power_off(scp->reg_base + MT8192_CPU0_SRAM_PD, 0); + mt8195_scp_l2tcm_off(scp); + + scp_sram_power_off(scp->cluster->reg_base + MT8192_CPU0_SRAM_PD, 0); + + /* Disable SCP watchdog */ + writel(0, scp->cluster->reg_base + MT8192_CORE0_WDT_CFG); +} + +static void mt8195_scp_c1_stop(struct mtk_scp *scp) +{ + mt8195_scp_l2tcm_off(scp); + + /* Power off CPU SRAM */ + scp_sram_power_off(scp->cluster->reg_base + MT8195_CPU1_SRAM_PD, 0); /* Disable SCP watchdog */ - writel(0, scp->reg_base + MT8192_CORE0_WDT_CFG); + writel(0, scp->cluster->reg_base + MT8195_CORE1_WDT_CFG); } static int scp_stop(struct rproc *rproc) @@ -811,7 +950,9 @@ static void scp_remove_rpmsg_subdev(struct mtk_scp *scp) } } -static int scp_probe(struct platform_device *pdev) +static struct mtk_scp *scp_rproc_init(struct platform_device *pdev, + struct mtk_scp_of_cluster *scp_cluster, + const struct mtk_scp_of_data *of_data) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; @@ -823,52 +964,38 @@ static int scp_probe(struct platform_device *pdev) ret = rproc_of_parse_firmware(dev, 0, &fw_name); if (ret < 0 && ret != -EINVAL) - return ret; + return ERR_PTR(ret); rproc = devm_rproc_alloc(dev, np->name, &scp_ops, fw_name, sizeof(*scp)); - if (!rproc) - return dev_err_probe(dev, -ENOMEM, "unable to allocate remoteproc\n"); + if (!rproc) { + dev_err(dev, "unable to allocate remoteproc\n"); + return ERR_PTR(-ENOMEM); + } scp = rproc->priv; scp->rproc = rproc; scp->dev = dev; - scp->data = of_device_get_match_data(dev); + scp->data = of_data; + scp->cluster = scp_cluster; platform_set_drvdata(pdev, scp); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram"); scp->sram_base = devm_ioremap_resource(dev, res); - if (IS_ERR(scp->sram_base)) - return dev_err_probe(dev, PTR_ERR(scp->sram_base), - "Failed to parse and map sram memory\n"); + if (IS_ERR(scp->sram_base)) { + dev_err(dev, "Failed to parse and map sram memory\n"); + return ERR_CAST(scp->sram_base); + } scp->sram_size = resource_size(res); scp->sram_phys = res->start; - /* l1tcm is an optional memory region */ - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "l1tcm"); - scp->l1tcm_base = devm_ioremap_resource(dev, res); - if (IS_ERR(scp->l1tcm_base)) { - ret = PTR_ERR(scp->l1tcm_base); - if (ret != -EINVAL) { - return dev_err_probe(dev, ret, "Failed to map l1tcm memory\n"); - } - } else { - scp->l1tcm_size = resource_size(res); - scp->l1tcm_phys = res->start; - } - - scp->reg_base = devm_platform_ioremap_resource_byname(pdev, "cfg"); - if (IS_ERR(scp->reg_base)) - return dev_err_probe(dev, PTR_ERR(scp->reg_base), - "Failed to parse and map cfg memory\n"); - ret = scp->data->scp_clk_get(scp); if (ret) - return ret; + return ERR_PTR(ret); ret = scp_map_memory_region(scp); if (ret) - return ret; + return ERR_PTR(ret); mutex_init(&scp->send_lock); for (i = 0; i < SCP_IPI_MAX; i++) @@ -895,11 +1022,7 @@ static int scp_probe(struct platform_device *pdev) goto remove_subdev; } - ret = rproc_add(rproc); - if (ret) - goto remove_subdev; - - return 0; + return scp; remove_subdev: scp_remove_rpmsg_subdev(scp); @@ -910,15 +1033,13 @@ release_dev_mem: mutex_destroy(&scp->ipi_desc[i].lock); mutex_destroy(&scp->send_lock); - return ret; + return ERR_PTR(ret); } -static void scp_remove(struct platform_device *pdev) +static void scp_free(struct mtk_scp *scp) { - struct mtk_scp *scp = platform_get_drvdata(pdev); int i; - rproc_del(scp->rproc); scp_remove_rpmsg_subdev(scp); scp_ipi_unregister(scp, SCP_IPI_INIT); scp_unmap_memory_region(scp); @@ -927,6 +1048,186 @@ static void scp_remove(struct platform_device *pdev) mutex_destroy(&scp->send_lock); } +static int scp_add_single_core(struct platform_device *pdev, + struct mtk_scp_of_cluster *scp_cluster) +{ + struct device *dev = &pdev->dev; + struct list_head *scp_list = &scp_cluster->mtk_scp_list; + struct mtk_scp *scp; + int ret; + + scp = scp_rproc_init(pdev, scp_cluster, of_device_get_match_data(dev)); + if (IS_ERR(scp)) + return PTR_ERR(scp); + + ret = rproc_add(scp->rproc); + if (ret) { + dev_err(dev, "Failed to add rproc\n"); + scp_free(scp); + return ret; + } + + list_add_tail(&scp->elem, scp_list); + + return 0; +} + +static int scp_add_multi_core(struct platform_device *pdev, + struct mtk_scp_of_cluster *scp_cluster) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev_of_node(dev); + struct platform_device *cpdev; + struct device_node *child; + struct list_head *scp_list = &scp_cluster->mtk_scp_list; + const struct mtk_scp_of_data **cluster_of_data; + struct mtk_scp *scp, *temp; + int core_id = 0; + int ret; + + cluster_of_data = (const struct mtk_scp_of_data **)of_device_get_match_data(dev); + + for_each_available_child_of_node(np, child) { + if (!cluster_of_data[core_id]) { + ret = -EINVAL; + dev_err(dev, "Not support core %d\n", core_id); + of_node_put(child); + goto init_fail; + } + + cpdev = of_find_device_by_node(child); + if (!cpdev) { + ret = -ENODEV; + dev_err(dev, "Not found platform device for core %d\n", core_id); + of_node_put(child); + goto init_fail; + } + + scp = scp_rproc_init(cpdev, scp_cluster, cluster_of_data[core_id]); + put_device(&cpdev->dev); + if (IS_ERR(scp)) { + ret = PTR_ERR(scp); + dev_err(dev, "Failed to initialize core %d rproc\n", core_id); + of_node_put(child); + goto init_fail; + } + + ret = rproc_add(scp->rproc); + if (ret) { + dev_err(dev, "Failed to add rproc of core %d\n", core_id); + of_node_put(child); + scp_free(scp); + goto init_fail; + } + + list_add_tail(&scp->elem, scp_list); + core_id++; + } + + /* + * Here we are setting the platform device for @pdev to the last @scp that was + * created, which is needed because (1) scp_rproc_init() is calling + * platform_set_drvdata() on the child platform devices and (2) we need a handle to + * the cluster list in scp_remove(). + */ + platform_set_drvdata(pdev, scp); + + return 0; + +init_fail: + list_for_each_entry_safe_reverse(scp, temp, scp_list, elem) { + list_del(&scp->elem); + rproc_del(scp->rproc); + scp_free(scp); + } + + return ret; +} + +static bool scp_is_single_core(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev_of_node(dev); + struct device_node *child; + int num_cores = 0; + + for_each_child_of_node(np, child) + if (of_device_is_compatible(child, "mediatek,scp-core")) + num_cores++; + + return num_cores < 2; +} + +static int scp_cluster_init(struct platform_device *pdev, struct mtk_scp_of_cluster *scp_cluster) +{ + int ret; + + if (scp_is_single_core(pdev)) + ret = scp_add_single_core(pdev, scp_cluster); + else + ret = scp_add_multi_core(pdev, scp_cluster); + + return ret; +} + +static int scp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_scp_of_cluster *scp_cluster; + struct resource *res; + int ret; + + scp_cluster = devm_kzalloc(dev, sizeof(*scp_cluster), GFP_KERNEL); + if (!scp_cluster) + return -ENOMEM; + + scp_cluster->reg_base = devm_platform_ioremap_resource_byname(pdev, "cfg"); + if (IS_ERR(scp_cluster->reg_base)) + return dev_err_probe(dev, PTR_ERR(scp_cluster->reg_base), + "Failed to parse and map cfg memory\n"); + + /* l1tcm is an optional memory region */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "l1tcm"); + scp_cluster->l1tcm_base = devm_ioremap_resource(dev, res); + if (IS_ERR(scp_cluster->l1tcm_base)) { + ret = PTR_ERR(scp_cluster->l1tcm_base); + if (ret != -EINVAL) + return dev_err_probe(dev, ret, "Failed to map l1tcm memory\n"); + + scp_cluster->l1tcm_base = NULL; + } else { + scp_cluster->l1tcm_size = resource_size(res); + scp_cluster->l1tcm_phys = res->start; + } + + INIT_LIST_HEAD(&scp_cluster->mtk_scp_list); + mutex_init(&scp_cluster->cluster_lock); + + ret = devm_of_platform_populate(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to populate platform devices\n"); + + ret = scp_cluster_init(pdev, scp_cluster); + if (ret) + return ret; + + return 0; +} + +static void scp_remove(struct platform_device *pdev) +{ + struct mtk_scp *scp = platform_get_drvdata(pdev); + struct mtk_scp_of_cluster *scp_cluster = scp->cluster; + struct mtk_scp *temp; + + list_for_each_entry_safe_reverse(scp, temp, &scp_cluster->mtk_scp_list, elem) { + list_del(&scp->elem); + rproc_del(scp->rproc); + scp_free(scp); + } + mutex_destroy(&scp_cluster->cluster_lock); +} + static const struct mtk_scp_of_data mt8183_of_data = { .scp_clk_get = mt8183_scp_clk_get, .scp_before_load = mt8183_scp_before_load, @@ -980,7 +1281,7 @@ static const struct mtk_scp_of_data mt8192_of_data = { static const struct mtk_scp_of_data mt8195_of_data = { .scp_clk_get = mt8195_scp_clk_get, .scp_before_load = mt8195_scp_before_load, - .scp_irq_handler = mt8192_scp_irq_handler, + .scp_irq_handler = mt8195_scp_irq_handler, .scp_reset_assert = mt8192_scp_reset_assert, .scp_reset_deassert = mt8192_scp_reset_deassert, .scp_stop = mt8195_scp_stop, @@ -989,12 +1290,31 @@ static const struct mtk_scp_of_data mt8195_of_data = { .host_to_scp_int_bit = MT8192_HOST_IPC_INT_BIT, }; +static const struct mtk_scp_of_data mt8195_of_data_c1 = { + .scp_clk_get = mt8195_scp_clk_get, + .scp_before_load = mt8195_scp_c1_before_load, + .scp_irq_handler = mt8195_scp_c1_irq_handler, + .scp_reset_assert = mt8195_scp_c1_reset_assert, + .scp_reset_deassert = mt8195_scp_c1_reset_deassert, + .scp_stop = mt8195_scp_c1_stop, + .scp_da_to_va = mt8192_scp_da_to_va, + .host_to_scp_reg = MT8192_GIPC_IN_SET, + .host_to_scp_int_bit = MT8195_CORE1_HOST_IPC_INT_BIT, +}; + +static const struct mtk_scp_of_data *mt8195_of_data_cores[] = { + &mt8195_of_data, + &mt8195_of_data_c1, + NULL +}; + static const struct of_device_id mtk_scp_of_match[] = { { .compatible = "mediatek,mt8183-scp", .data = &mt8183_of_data }, { .compatible = "mediatek,mt8186-scp", .data = &mt8186_of_data }, { .compatible = "mediatek,mt8188-scp", .data = &mt8188_of_data }, { .compatible = "mediatek,mt8192-scp", .data = &mt8192_of_data }, { .compatible = "mediatek,mt8195-scp", .data = &mt8195_of_data }, + { .compatible = "mediatek,mt8195-scp-dual", .data = &mt8195_of_data_cores }, {}, }; MODULE_DEVICE_TABLE(of, mtk_scp_of_match); diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c index 9c7c17b9d181..cd0b60106ec2 100644 --- a/drivers/remoteproc/mtk_scp_ipi.c +++ b/drivers/remoteproc/mtk_scp_ipi.c @@ -177,7 +177,7 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len, mutex_lock(&scp->send_lock); /* Wait until SCP receives the last command */ - ret = readl_poll_timeout_atomic(scp->reg_base + scp->data->host_to_scp_reg, + ret = readl_poll_timeout_atomic(scp->cluster->reg_base + scp->data->host_to_scp_reg, val, !val, 0, SCP_TIMEOUT_US); if (ret) { dev_err(scp->dev, "%s: IPI timeout!\n", __func__); @@ -192,7 +192,7 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len, scp->ipi_id_ack[id] = false; /* send the command to SCP */ writel(scp->data->host_to_scp_int_bit, - scp->reg_base + scp->data->host_to_scp_reg); + scp->cluster->reg_base + scp->data->host_to_scp_reg); if (wait) { /* wait for SCP's ACK */ diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index 22fe7b5f5236..394b2c1cb5e2 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -2322,7 +2322,6 @@ static const struct rproc_hexagon_res msm8996_mss = { }, .proxy_clk_names = (char*[]){ "xo", - "pnoc", "qdss", NULL }, diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index b5447dd2dd35..913a5d2068e8 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -813,6 +813,21 @@ static const struct adsp_data sm6350_adsp_resource = { .ssctl_id = 0x14, }; +static const struct adsp_data sm6375_mpss_resource = { + .crash_reason_smem = 421, + .firmware_name = "modem.mdt", + .pas_id = 4, + .minidump_id = 3, + .auto_boot = false, + .proxy_pd_names = (char*[]){ + "cx", + NULL + }, + .ssr_name = "mpss", + .sysmon_name = "modem", + .ssctl_id = 0x12, +}; + static const struct adsp_data sm8150_adsp_resource = { .crash_reason_smem = 423, .firmware_name = "adsp.mdt", @@ -1161,6 +1176,7 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init }, { .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init }, { .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init }, + { .compatible = "qcom,sc7180-adsp-pas", .data = &sm8250_adsp_resource}, { .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc7280-mpss-pas", .data = &mpss_resource_init}, { .compatible = "qcom,sc8180x-adsp-pas", .data = &sm8150_adsp_resource}, @@ -1180,6 +1196,9 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sm6350-adsp-pas", .data = &sm6350_adsp_resource}, { .compatible = "qcom,sm6350-cdsp-pas", .data = &sm6350_cdsp_resource}, { .compatible = "qcom,sm6350-mpss-pas", .data = &mpss_resource_init}, + { .compatible = "qcom,sm6375-adsp-pas", .data = &sm6350_adsp_resource}, + { .compatible = "qcom,sm6375-cdsp-pas", .data = &sm8150_cdsp_resource}, + { .compatible = "qcom,sm6375-mpss-pas", .data = &sm6375_mpss_resource}, { .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource}, { .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource}, { .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init}, diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c index e3ce01d98b4c..cb163766c56d 100644 --- a/drivers/remoteproc/st_remoteproc.c +++ b/drivers/remoteproc/st_remoteproc.c @@ -16,10 +16,9 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/of_reserved_mem.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/remoteproc.h> #include <linux/reset.h> @@ -341,7 +340,6 @@ static int st_rproc_parse_dt(struct platform_device *pdev) static int st_rproc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - const struct of_device_id *match; struct st_rproc *ddata; struct device_node *np = dev->of_node; struct rproc *rproc; @@ -349,19 +347,17 @@ static int st_rproc_probe(struct platform_device *pdev) int enabled; int ret, i; - match = of_match_device(st_rproc_match, dev); - if (!match || !match->data) { - dev_err(dev, "No device match found\n"); - return -ENODEV; - } - rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata)); if (!rproc) return -ENOMEM; rproc->has_iommu = false; ddata = rproc->priv; - ddata->config = (struct st_rproc_config *)match->data; + ddata->config = (struct st_rproc_config *)device_get_match_data(dev); + if (!ddata->config) { + ret = -ENODEV; + goto free_rproc; + } platform_set_drvdata(pdev, rproc); diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 9d9b13530f78..4f469f0bcf8b 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -712,9 +712,9 @@ static int stm32_rproc_parse_dt(struct platform_device *pdev, unsigned int tzen; int err, irq; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq == -EPROBE_DEFER) - return dev_err_probe(dev, irq, "failed to get interrupt\n"); + return irq; if (irq > 0) { err = devm_request_irq(dev, irq, stm32_rproc_wdg, 0, diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c b/drivers/remoteproc/xlnx_r5_remoteproc.c index feca6de68da2..4395edea9a64 100644 --- a/drivers/remoteproc/xlnx_r5_remoteproc.c +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c @@ -39,12 +39,14 @@ enum zynqmp_r5_cluster_mode { * struct mem_bank_data - Memory Bank description * * @addr: Start address of memory bank + * @da: device address * @size: Size of Memory bank * @pm_domain_id: Power-domains id of memory bank for firmware to turn on/off * @bank_name: name of the bank for remoteproc framework */ struct mem_bank_data { phys_addr_t addr; + u32 da; size_t size; u32 pm_domain_id; char *bank_name; @@ -75,11 +77,19 @@ struct mbox_info { * Hardcoded TCM bank values. This will be removed once TCM bindings are * accepted for system-dt specifications and upstreamed in linux kernel */ -static const struct mem_bank_data zynqmp_tcm_banks[] = { - {0xffe00000UL, 0x10000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 64KB each */ - {0xffe20000UL, 0x10000UL, PD_R5_0_BTCM, "btcm0"}, - {0xffe90000UL, 0x10000UL, PD_R5_1_ATCM, "atcm1"}, - {0xffeb0000UL, 0x10000UL, PD_R5_1_BTCM, "btcm1"}, +static const struct mem_bank_data zynqmp_tcm_banks_split[] = { + {0xffe00000UL, 0x0, 0x10000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 64KB each */ + {0xffe20000UL, 0x20000, 0x10000UL, PD_R5_0_BTCM, "btcm0"}, + {0xffe90000UL, 0x0, 0x10000UL, PD_R5_1_ATCM, "atcm1"}, + {0xffeb0000UL, 0x20000, 0x10000UL, PD_R5_1_BTCM, "btcm1"}, +}; + +/* In lockstep mode cluster combines each 64KB TCM and makes 128KB TCM */ +static const struct mem_bank_data zynqmp_tcm_banks_lockstep[] = { + {0xffe00000UL, 0x0, 0x20000UL, PD_R5_0_ATCM, "atcm0"}, /* TCM 128KB each */ + {0xffe20000UL, 0x20000, 0x20000UL, PD_R5_0_BTCM, "btcm0"}, + {0, 0, 0, PD_R5_1_ATCM, ""}, + {0, 0, 0, PD_R5_1_BTCM, ""}, }; /** @@ -526,30 +536,6 @@ static int tcm_mem_map(struct rproc *rproc, /* clear TCMs */ memset_io(va, 0, mem->len); - /* - * The R5s expect their TCM banks to be at address 0x0 and 0x2000, - * while on the Linux side they are at 0xffexxxxx. - * - * Zero out the high 12 bits of the address. This will give - * expected values for TCM Banks 0A and 0B (0x0 and 0x20000). - */ - mem->da &= 0x000fffff; - - /* - * TCM Banks 1A and 1B still have to be translated. - * - * Below handle these two banks' absolute addresses (0xffe90000 and - * 0xffeb0000) and convert to the expected relative addresses - * (0x0 and 0x20000). - */ - if (mem->da == 0x90000 || mem->da == 0xB0000) - mem->da -= 0x90000; - - /* if translated TCM bank address is not valid report error */ - if (mem->da != 0x0 && mem->da != 0x20000) { - dev_err(&rproc->dev, "invalid TCM address: %x\n", mem->da); - return -EINVAL; - } return 0; } @@ -571,6 +557,7 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc) u32 pm_domain_id; size_t bank_size; char *bank_name; + u32 da; r5_core = rproc->priv; dev = r5_core->dev; @@ -583,6 +570,7 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc) */ for (i = 0; i < num_banks; i++) { bank_addr = r5_core->tcm_banks[i]->addr; + da = r5_core->tcm_banks[i]->da; bank_name = r5_core->tcm_banks[i]->bank_name; bank_size = r5_core->tcm_banks[i]->size; pm_domain_id = r5_core->tcm_banks[i]->pm_domain_id; @@ -595,11 +583,11 @@ static int add_tcm_carveout_split_mode(struct rproc *rproc) goto release_tcm_split; } - dev_dbg(dev, "TCM carveout split mode %s addr=%llx, size=0x%lx", - bank_name, bank_addr, bank_size); + dev_dbg(dev, "TCM carveout split mode %s addr=%llx, da=0x%x, size=0x%lx", + bank_name, bank_addr, da, bank_size); rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr, - bank_size, bank_addr, + bank_size, da, tcm_mem_map, tcm_mem_unmap, bank_name); if (!rproc_mem) { @@ -640,6 +628,7 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc) struct device *dev; u32 pm_domain_id; char *bank_name; + u32 da; r5_core = rproc->priv; dev = r5_core->dev; @@ -650,14 +639,11 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc) /* * In lockstep mode, TCM is contiguous memory block * However, each TCM block still needs to be enabled individually. - * So, Enable each TCM block individually, but add their size - * to create contiguous memory region. + * So, Enable each TCM block individually. + * Although ATCM and BTCM is contiguous memory block, add two separate + * carveouts for both. */ - bank_addr = r5_core->tcm_banks[0]->addr; - bank_name = r5_core->tcm_banks[0]->bank_name; - for (i = 0; i < num_banks; i++) { - bank_size += r5_core->tcm_banks[i]->size; pm_domain_id = r5_core->tcm_banks[i]->pm_domain_id; /* Turn on each TCM bank individually */ @@ -668,23 +654,32 @@ static int add_tcm_carveout_lockstep_mode(struct rproc *rproc) dev_err(dev, "failed to turn on TCM 0x%x", pm_domain_id); goto release_tcm_lockstep; } - } - dev_dbg(dev, "TCM add carveout lockstep mode %s addr=0x%llx, size=0x%lx", - bank_name, bank_addr, bank_size); - - /* Register TCM address range, TCM map and unmap functions */ - rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr, - bank_size, bank_addr, - tcm_mem_map, tcm_mem_unmap, - bank_name); - if (!rproc_mem) { - ret = -ENOMEM; - goto release_tcm_lockstep; - } + bank_size = r5_core->tcm_banks[i]->size; + if (bank_size == 0) + continue; - /* If registration is success, add carveouts */ - rproc_add_carveout(rproc, rproc_mem); + bank_addr = r5_core->tcm_banks[i]->addr; + da = r5_core->tcm_banks[i]->da; + bank_name = r5_core->tcm_banks[i]->bank_name; + + /* Register TCM address range, TCM map and unmap functions */ + rproc_mem = rproc_mem_entry_init(dev, NULL, bank_addr, + bank_size, da, + tcm_mem_map, tcm_mem_unmap, + bank_name); + if (!rproc_mem) { + ret = -ENOMEM; + zynqmp_pm_release_node(pm_domain_id); + goto release_tcm_lockstep; + } + + /* If registration is success, add carveouts */ + rproc_add_carveout(rproc, rproc_mem); + + dev_dbg(dev, "TCM carveout lockstep mode %s addr=0x%llx, da=0x%x, size=0x%lx", + bank_name, bank_addr, da, bank_size); + } return 0; @@ -895,12 +890,19 @@ free_rproc: */ static int zynqmp_r5_get_tcm_node(struct zynqmp_r5_cluster *cluster) { + const struct mem_bank_data *zynqmp_tcm_banks; struct device *dev = cluster->dev; struct zynqmp_r5_core *r5_core; int tcm_bank_count, tcm_node; int i, j; - tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks); + if (cluster->mode == SPLIT_MODE) { + zynqmp_tcm_banks = zynqmp_tcm_banks_split; + tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks_split); + } else { + zynqmp_tcm_banks = zynqmp_tcm_banks_lockstep; + tcm_bank_count = ARRAY_SIZE(zynqmp_tcm_banks_lockstep); + } /* count per core tcm banks */ tcm_bank_count = tcm_bank_count / cluster->core_count; diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 32b550c91d9f..8abc7d022ff7 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -545,7 +545,7 @@ static int rpmsg_dev_probe(struct device *dev) goto out; if (rpdrv->callback) { - strncpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE); + strscpy(chinfo.name, rpdev->id.name, sizeof(chinfo.name)); chinfo.src = rpdev->src; chinfo.dst = RPMSG_ADDR_ANY; diff --git a/drivers/rpmsg/rpmsg_ns.c b/drivers/rpmsg/rpmsg_ns.c index c70ad03ff2e9..bde8c8d433e0 100644 --- a/drivers/rpmsg/rpmsg_ns.c +++ b/drivers/rpmsg/rpmsg_ns.c @@ -50,7 +50,7 @@ static int rpmsg_ns_cb(struct rpmsg_device *rpdev, void *data, int len, /* don't trust the remote processor for null terminating the name */ msg->name[RPMSG_NAME_SIZE - 1] = '\0'; - strncpy(chinfo.name, msg->name, sizeof(chinfo.name)); + strscpy_pad(chinfo.name, msg->name, sizeof(chinfo.name)); chinfo.src = RPMSG_ADDR_ANY; chinfo.dst = rpmsg32_to_cpu(rpdev, msg->addr); diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 905ac7910c98..dc87965f8164 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -329,7 +329,7 @@ static int virtio_rpmsg_announce_create(struct rpmsg_device *rpdev) virtio_has_feature(vrp->vdev, VIRTIO_RPMSG_F_NS)) { struct rpmsg_ns_msg nsm; - strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE); + strscpy_pad(nsm.name, rpdev->id.name, sizeof(nsm.name)); nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr); nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_CREATE); @@ -353,7 +353,7 @@ static int virtio_rpmsg_announce_destroy(struct rpmsg_device *rpdev) virtio_has_feature(vrp->vdev, VIRTIO_RPMSG_F_NS)) { struct rpmsg_ns_msg nsm; - strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE); + strscpy_pad(nsm.name, rpdev->id.name, sizeof(nsm.name)); nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr); nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_DESTROY); @@ -424,7 +424,7 @@ static struct rpmsg_device *__rpmsg_create_channel(struct virtproc_info *vrp, */ rpdev->announce = rpdev->src != RPMSG_ADDR_ANY; - strncpy(rpdev->id.name, chinfo->name, RPMSG_NAME_SIZE); + strscpy(rpdev->id.name, chinfo->name, sizeof(rpdev->id.name)); rpdev->dev.parent = &vrp->vdev->dev; rpdev->dev.release = virtio_rpmsg_release_device; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d09e08b71cfb..5dd33155d5d5 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); /* * ap_queue_info(): Check and get AP queue info. * Returns: 1 if APQN exists and info is filled, - * 0 if APQN seems to exit but there is no info + * 0 if APQN seems to exist but there is no info * available (eg. caused by an asynch pending error) * -1 invalid APQN, TAPQ error or AP queue status which * indicates there is no APQN. @@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, /* call TAPQ on this APQN */ status = ap_test_queue(qid, ap_apft_available(), &tapq_info); - /* handle pending async error with return 'no info available' */ - if (status.async) - return 0; - switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: case AP_RESPONSE_BUSY: - /* - * According to the architecture in all these cases the - * info should be filled. All bits 0 is not possible as - * there is at least one of the mode bits set. - */ - if (WARN_ON_ONCE(!tapq_info.value)) - return 0; - *q_type = tapq_info.at; - *q_fac = tapq_info.fac; - *q_depth = tapq_info.qd; - *q_ml = tapq_info.ml; - *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; - *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; - return 1; + /* For all these RCs the tapq info should be available */ + break; default: - /* - * A response code which indicates, there is no info available. - */ - return -1; + /* On a pending async error the info should be available */ + if (!status.async) + return -1; + break; } + + /* There should be at least one of the mode bits set */ + if (WARN_ON_ONCE(!tapq_info.value)) + return 0; + + *q_type = tapq_info.at; + *q_fac = tapq_info.fac; + *q_depth = tapq_info.qd; + *q_ml = tapq_info.ml; + *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; + *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; + + return 1; } void ap_wait(enum ap_sm_wait wait) @@ -1022,6 +1019,10 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { + /* Only trigger AP bus scans after the initial scan is done */ + if (atomic64_read(&ap_scan_bus_count) <= 0) + return; + /* processing a asynchronous bus rescan */ del_timer(&ap_config_timer); queue_work(system_long_wq, &ap_scan_work); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 359a35f894d5..b0771ca0849b 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -206,7 +206,6 @@ struct ap_queue { bool config; /* configured state */ bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ - bool interrupt; /* indicate if interrupts are enabled */ bool se_bound; /* SE bound state */ unsigned int assoc_idx; /* SE association index */ int queue_count; /* # messages currently on AP queue. */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 993240370ecf..3934a0cc13e7 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) return AP_SM_WAIT_AGAIN; } aq->sm_state = AP_SM_STATE_IDLE; - return AP_SM_WAIT_NONE; + break; case AP_RESPONSE_NO_PENDING_REPLY: if (aq->queue_count > 0) - return aq->interrupt ? + return status.irq_enabled ? AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT; aq->sm_state = AP_SM_STATE_IDLE; - return AP_SM_WAIT_NONE; + break; default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; @@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } + /* Check and maybe enable irq support (again) on this queue */ + if (!status.irq_enabled && status.queue_empty) { + void *lsi_ptr = ap_airq_ptr(); + + if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) { + aq->sm_state = AP_SM_STATE_SETIRQ_WAIT; + return AP_SM_WAIT_AGAIN; + } + } + return AP_SM_WAIT_NONE; } /** @@ -254,7 +264,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) fallthrough; case AP_RESPONSE_Q_FULL: aq->sm_state = AP_SM_STATE_QUEUE_FULL; - return aq->interrupt ? + return status.irq_enabled ? AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT; case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; @@ -307,7 +317,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; - aq->interrupt = false; aq->rapq_fbit = 0; aq->se_bound = false; return AP_SM_WAIT_LOW_TIMEOUT; @@ -383,7 +392,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) if (status.irq_enabled == 1) { /* Irqs are now enabled */ - aq->interrupt = true; aq->sm_state = (aq->queue_count > 0) ? AP_SM_STATE_WORKING : AP_SM_STATE_IDLE; } @@ -626,16 +634,21 @@ static ssize_t interrupt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; int rc = 0; spin_lock_bh(&aq->lock); - if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) + if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) { rc = sysfs_emit(buf, "Enable Interrupt pending.\n"); - else if (aq->interrupt) - rc = sysfs_emit(buf, "Interrupts enabled.\n"); - else - rc = sysfs_emit(buf, "Interrupts disabled.\n"); + } else { + status = ap_tapq(aq->qid, NULL); + if (status.irq_enabled) + rc = sysfs_emit(buf, "Interrupts enabled.\n"); + else + rc = sysfs_emit(buf, "Interrupts disabled.\n"); + } spin_unlock_bh(&aq->lock); + return rc; } @@ -1032,7 +1045,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) if (ap_sb_available() && is_prot_virt_guest()) aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups; aq->qid = qid; - aq->interrupt = false; spin_lock_init(&aq->lock); INIT_LIST_HEAD(&aq->pendingq); INIT_LIST_HEAD(&aq->requestq); diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c815722d0ac8..050462d95222 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_card *zc = dev_get_drvdata(dev); struct ap_card *ac = to_ap_card(dev); - int online = ac->config && zc->online ? 1 : 0; + int online = ac->config && !ac->chkstop && zc->online ? 1 : 0; return sysfs_emit(buf, "%d\n", online); } @@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev, if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; - if (online && !ac->config) + if (online && (!ac->config || ac->chkstop)) return -ENODEV; zc->online = online; diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 112a80e8e6c2..67d8e0ae0eec 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_queue *zq = dev_get_drvdata(dev); struct ap_queue *aq = to_ap_queue(dev); - int online = aq->config && zq->online ? 1 : 0; + int online = aq->config && !aq->chkstop && zq->online ? 1 : 0; return sysfs_emit(buf, "%d\n", online); } @@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev, if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; - if (online && (!aq->config || !aq->card->config)) + if (online && (!aq->config || !aq->card->config || + aq->chkstop || aq->card->chkstop)) return -ENODEV; if (online && !zc->online) return -EINVAL; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6af2511e070c..cf8506d0f185 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3675,7 +3675,7 @@ static void qeth_flush_queue(struct qeth_qdio_out_q *queue) static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) { /* - * check if weed have to switch to non-packing mode or if + * check if we have to switch to non-packing mode or if * we have to get a pci flag out on the queue */ if ((atomic_read(&queue->used_buffers) <= QETH_LOW_WATERMARK_PACK) || diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 35dbfacecf1c..70c9dd6b6a31 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -1169,6 +1169,7 @@ config SPI_XTENSA_XTFPGA config SPI_ZYNQ_QSPI tristate "Xilinx Zynq QSPI controller" depends on ARCH_ZYNQ || COMPILE_TEST + depends on SPI_MEM help This enables support for the Zynq Quad SPI controller in master mode. diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 791df0e69105..8ead7acb99f3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3317,33 +3317,52 @@ void spi_unregister_controller(struct spi_controller *ctlr) } EXPORT_SYMBOL_GPL(spi_unregister_controller); +static inline int __spi_check_suspended(const struct spi_controller *ctlr) +{ + return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0; +} + +static inline void __spi_mark_suspended(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags |= SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + +static inline void __spi_mark_resumed(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + int spi_controller_suspend(struct spi_controller *ctlr) { - int ret; + int ret = 0; /* Basically no-ops for non-queued controllers */ - if (!ctlr->queued) - return 0; - - ret = spi_stop_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue stop failed\n"); + if (ctlr->queued) { + ret = spi_stop_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue stop failed\n"); + } + __spi_mark_suspended(ctlr); return ret; } EXPORT_SYMBOL_GPL(spi_controller_suspend); int spi_controller_resume(struct spi_controller *ctlr) { - int ret; - - if (!ctlr->queued) - return 0; + int ret = 0; - ret = spi_start_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue restart failed\n"); + __spi_mark_resumed(ctlr); + if (ctlr->queued) { + ret = spi_start_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue restart failed\n"); + } return ret; } EXPORT_SYMBOL_GPL(spi_controller_resume); @@ -4147,8 +4166,7 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s ctlr->cur_msg = msg; ret = __spi_pump_transfer_message(ctlr, msg, was_busy); if (ret) - goto out; - + dev_err(&ctlr->dev, "noqueue transfer failed\n"); ctlr->cur_msg = NULL; ctlr->fallback = false; @@ -4164,7 +4182,6 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s spi_idle_runtime_pm(ctlr); } -out: mutex_unlock(&ctlr->io_mutex); } @@ -4187,6 +4204,11 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) int status; struct spi_controller *ctlr = spi->controller; + if (__spi_check_suspended(ctlr)) { + dev_warn_once(&spi->dev, "Attempted to sync while suspend\n"); + return -ESHUTDOWN; + } + status = __spi_validate(spi, message); if (status != 0) return status; diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c index 57cc1960d059..a3cb68cfa0f9 100644 --- a/drivers/staging/greybus/pwm.c +++ b/drivers/staging/greybus/pwm.c @@ -258,7 +258,6 @@ static const struct pwm_ops gb_pwm_ops = { .request = gb_pwm_request, .free = gb_pwm_free, .apply = gb_pwm_apply, - .owner = THIS_MODULE, }; static int gb_pwm_probe(struct gbphy_device *gbphy_dev, diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 925ee1d61afb..8ca061d3bbb9 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -170,13 +170,6 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, - [PORT_AR7] = { - .name = "AR7", - .fifo_size = 16, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, - .flags = UART_CAP_FIFO /* | UART_CAP_AFE */, - }, [PORT_U6_16550A] = { .name = "U6_16550A", .fifo_size = 64, diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c index b18c6b4f129a..305f396c764c 100644 --- a/drivers/video/fbdev/amifb.c +++ b/drivers/video/fbdev/amifb.c @@ -3752,7 +3752,7 @@ release: } -static int __exit amifb_remove(struct platform_device *pdev) +static void __exit amifb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); @@ -3765,11 +3765,16 @@ static int __exit amifb_remove(struct platform_device *pdev) chipfree(); framebuffer_release(info); amifb_video_off(); - return 0; } -static struct platform_driver amifb_driver = { - .remove = __exit_p(amifb_remove), +/* + * amifb_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this ok because they cannot get unboud at + * runtime. The driver needs to be marked with __refdata, otherwise modpost + * triggers a section mismatch warning. + */ +static struct platform_driver amifb_driver __refdata = { + .remove_new = __exit_p(amifb_remove), .driver = { .name = "amiga-video", }, diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index a908db233409..9e391e5eaf9d 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -220,7 +220,7 @@ static inline void atmel_lcdfb_power_control(struct atmel_lcdfb_info *sinfo, int } } -static const struct fb_fix_screeninfo atmel_lcdfb_fix __initconst = { +static const struct fb_fix_screeninfo atmel_lcdfb_fix = { .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_TRUECOLOR, .xpanstep = 0, @@ -841,7 +841,7 @@ static void atmel_lcdfb_task(struct work_struct *work) atmel_lcdfb_reset(sinfo); } -static int __init atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo) +static int atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo) { struct fb_info *info = sinfo->info; int ret = 0; @@ -1017,7 +1017,7 @@ put_display_node: return ret; } -static int __init atmel_lcdfb_probe(struct platform_device *pdev) +static int atmel_lcdfb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fb_info *info; @@ -1223,14 +1223,14 @@ out: return ret; } -static int __exit atmel_lcdfb_remove(struct platform_device *pdev) +static void atmel_lcdfb_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fb_info *info = dev_get_drvdata(dev); struct atmel_lcdfb_info *sinfo; if (!info || !info->par) - return 0; + return; sinfo = info->par; cancel_work_sync(&sinfo->task); @@ -1252,8 +1252,6 @@ static int __exit atmel_lcdfb_remove(struct platform_device *pdev) } framebuffer_release(info); - - return 0; } #ifdef CONFIG_PM @@ -1301,7 +1299,8 @@ static int atmel_lcdfb_resume(struct platform_device *pdev) #endif static struct platform_driver atmel_lcdfb_driver = { - .remove = __exit_p(atmel_lcdfb_remove), + .probe = atmel_lcdfb_probe, + .remove_new = atmel_lcdfb_remove, .suspend = atmel_lcdfb_suspend, .resume = atmel_lcdfb_resume, .driver = { @@ -1309,8 +1308,7 @@ static struct platform_driver atmel_lcdfb_driver = { .of_match_table = atmel_lcdfb_dt_ids, }, }; - -module_platform_driver_probe(atmel_lcdfb_driver, atmel_lcdfb_probe); +module_platform_driver(atmel_lcdfb_driver); MODULE_DESCRIPTION("AT91 LCD Controller framebuffer driver"); MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>"); diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c index 7fbd9f069ac2..0bced82fa494 100644 --- a/drivers/video/fbdev/fsl-diu-fb.c +++ b/drivers/video/fbdev/fsl-diu-fb.c @@ -490,7 +490,7 @@ static enum fsl_diu_monitor_port fsl_diu_name_to_port(const char *s) * Workaround for failed writing desc register of planes. * Needed with MPC5121 DIU rev 2.0 silicon. */ -void wr_reg_wa(u32 *reg, u32 val) +static void wr_reg_wa(u32 *reg, u32 val) { do { out_be32(reg, val); diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index bf59daf862fc..a80939fe2ee6 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1013,6 +1013,8 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) } else if (IS_ENABLED(CONFIG_SYSFB)) { base = screen_info.lfb_base; size = screen_info.lfb_size; + } else { + goto err1; } /* diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index e7e03e920729..660499260f46 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1421,7 +1421,6 @@ static int init_imstt(struct fb_info *info) if ((info->var.xres * info->var.yres) * (info->var.bits_per_pixel >> 3) > info->fix.smem_len || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) { printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel); - framebuffer_release(info); return -ENODEV; } @@ -1453,14 +1452,11 @@ static int init_imstt(struct fb_info *info) FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_YPAN; - if (fb_alloc_cmap(&info->cmap, 0, 0)) { - framebuffer_release(info); + if (fb_alloc_cmap(&info->cmap, 0, 0)) return -ENODEV; - } if (register_framebuffer(info) < 0) { fb_dealloc_cmap(&info->cmap); - framebuffer_release(info); return -ENODEV; } @@ -1500,8 +1496,8 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!request_mem_region(addr, size, "imsttfb")) { printk(KERN_ERR "imsttfb: Can't reserve memory region\n"); - framebuffer_release(info); - return -ENODEV; + ret = -ENODEV; + goto release_info; } switch (pdev->device) { @@ -1518,36 +1514,39 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "imsttfb: Device 0x%x unknown, " "contact maintainer.\n", pdev->device); ret = -ENODEV; - goto error; + goto release_mem_region; } info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); if (!info->screen_base) - goto error; + goto release_mem_region; info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); if (!par->dc_regs) - goto error; + goto unmap_screen_base; par->cmap_regs_phys = addr + 0x840000; par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000); if (!par->cmap_regs) - goto error; + goto unmap_dc_regs; info->pseudo_palette = par->palette; ret = init_imstt(info); if (ret) - goto error; + goto unmap_cmap_regs; pci_set_drvdata(pdev, info); - return ret; + return 0; -error: - if (par->dc_regs) - iounmap(par->dc_regs); - if (info->screen_base) - iounmap(info->screen_base); +unmap_cmap_regs: + iounmap(par->cmap_regs); +unmap_dc_regs: + iounmap(par->dc_regs); +unmap_screen_base: + iounmap(info->screen_base); +release_mem_region: release_mem_region(addr, size); +release_info: framebuffer_release(info); return ret; } diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index dcb1b81d35db..b421b46d88ef 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -423,11 +423,9 @@ static void offb_init_fb(struct platform_device *parent, const char *name, fix = &info->fix; var = &info->var; - if (name) { - strcpy(fix->id, "OFfb "); - strncat(fix->id, name, sizeof(fix->id) - sizeof("OFfb ")); - fix->id[sizeof(fix->id) - 1] = '\0'; - } else + if (name) + snprintf(fix->id, sizeof(fix->id), "OFfb %s", name); + else snprintf(fix->id, sizeof(fix->id), "OFfb %pOFn", dp); diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 42c96f1cfc93..694cf6318782 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1643,17 +1643,16 @@ static int omapfb_do_probe(struct platform_device *pdev, r = -ENOMEM; goto cleanup; } - fbdev->int_irq = platform_get_irq(pdev, 0); - if (fbdev->int_irq < 0) { - r = -ENXIO; + + r = platform_get_irq(pdev, 0); + if (r < 0) goto cleanup; - } + fbdev->int_irq = r; - fbdev->ext_irq = platform_get_irq(pdev, 1); - if (fbdev->ext_irq < 0) { - r = -ENXIO; + r = platform_get_irq(pdev, 1); + if (r < 0) goto cleanup; - } + fbdev->ext_irq = r; init_state++; @@ -1857,20 +1856,13 @@ static int __init omapfb_setup(char *options) if (!strncmp(this_opt, "accel", 5)) def_accel = 1; else if (!strncmp(this_opt, "vram:", 5)) { + unsigned long long vram; char *suffix; - unsigned long vram; - vram = (simple_strtoul(this_opt + 5, &suffix, 0)); + + vram = memparse(this_opt + 5, &suffix); switch (suffix[0]) { case '\0': break; - case 'm': - case 'M': - vram *= 1024; - fallthrough; - case 'k': - case 'K': - vram *= 1024; - break; default: pr_debug("omapfb: invalid vram suffix %c\n", suffix[0]); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c index 0daaf9f89bab..c6786726a1af 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c @@ -221,7 +221,7 @@ err_reg: return r; } -static int __exit tvc_remove(struct platform_device *pdev) +static void tvc_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -233,8 +233,6 @@ static int __exit tvc_remove(struct platform_device *pdev) tvc_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tvc_of_match[] = { @@ -247,11 +245,10 @@ MODULE_DEVICE_TABLE(of, tvc_of_match); static struct platform_driver tvc_connector_driver = { .probe = tvc_probe, - .remove = __exit_p(tvc_remove), + .remove_new = tvc_remove, .driver = { .name = "connector-analog-tv", .of_match_table = tvc_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index c8ad3ef42bd3..0cc9294f89b4 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -303,7 +303,7 @@ err_reg: return r; } -static int __exit dvic_remove(struct platform_device *pdev) +static void dvic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -317,8 +317,6 @@ static int __exit dvic_remove(struct platform_device *pdev) omap_dss_put_device(in); i2c_put_adapter(ddata->i2c_adapter); - - return 0; } static const struct of_device_id dvic_of_match[] = { @@ -330,11 +328,10 @@ MODULE_DEVICE_TABLE(of, dvic_of_match); static struct platform_driver dvi_connector_driver = { .probe = dvic_probe, - .remove = __exit_p(dvic_remove), + .remove_new = dvic_remove, .driver = { .name = "connector-dvi", .of_match_table = dvic_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c index 8f9ff9fb4ca4..b862a32670ae 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c @@ -249,7 +249,7 @@ err_reg: return r; } -static int __exit hdmic_remove(struct platform_device *pdev) +static void hdmic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -261,8 +261,6 @@ static int __exit hdmic_remove(struct platform_device *pdev) hdmic_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id hdmic_of_match[] = { @@ -274,11 +272,10 @@ MODULE_DEVICE_TABLE(of, hdmic_of_match); static struct platform_driver hdmi_connector_driver = { .probe = hdmic_probe, - .remove = __exit_p(hdmic_remove), + .remove_new = hdmic_remove, .driver = { .name = "connector-hdmi", .of_match_table = hdmic_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c index dd29dc5c77ec..f0d3eb581166 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c @@ -231,7 +231,7 @@ err_reg: return r; } -static int __exit opa362_remove(struct platform_device *pdev) +static void opa362_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -248,8 +248,6 @@ static int __exit opa362_remove(struct platform_device *pdev) opa362_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id opa362_of_match[] = { @@ -260,11 +258,10 @@ MODULE_DEVICE_TABLE(of, opa362_of_match); static struct platform_driver opa362_driver = { .probe = opa362_probe, - .remove = __exit_p(opa362_remove), + .remove_new = opa362_remove, .driver = { .name = "amplifier-opa362", .of_match_table = opa362_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c index 7bac420169a6..c8aca4592949 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c @@ -217,7 +217,7 @@ err_reg: return r; } -static int __exit tfp410_remove(struct platform_device *pdev) +static void tfp410_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -234,8 +234,6 @@ static int __exit tfp410_remove(struct platform_device *pdev) tfp410_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tfp410_of_match[] = { @@ -247,11 +245,10 @@ MODULE_DEVICE_TABLE(of, tfp410_of_match); static struct platform_driver tfp410_driver = { .probe = tfp410_probe, - .remove = __exit_p(tfp410_remove), + .remove_new = tfp410_remove, .driver = { .name = "tfp410", .of_match_table = tfp410_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c index 67f0c9250e9e..eb3926d0361b 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c @@ -283,7 +283,7 @@ err_gpio: return r; } -static int __exit tpd_remove(struct platform_device *pdev) +static void tpd_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -300,8 +300,6 @@ static int __exit tpd_remove(struct platform_device *pdev) tpd_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tpd_of_match[] = { @@ -313,11 +311,10 @@ MODULE_DEVICE_TABLE(of, tpd_of_match); static struct platform_driver tpd_driver = { .probe = tpd_probe, - .remove = __exit_p(tpd_remove), + .remove_new = tpd_remove, .driver = { .name = "tpd12s015", .of_match_table = tpd_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c index 9790053c5877..937f9091274f 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c @@ -211,7 +211,7 @@ err_reg: return r; } -static int __exit panel_dpi_remove(struct platform_device *pdev) +static void panel_dpi_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -223,8 +223,6 @@ static int __exit panel_dpi_remove(struct platform_device *pdev) panel_dpi_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id panel_dpi_of_match[] = { @@ -236,11 +234,10 @@ MODULE_DEVICE_TABLE(of, panel_dpi_of_match); static struct platform_driver panel_dpi_driver = { .probe = panel_dpi_probe, - .remove = __exit_p(panel_dpi_remove), + .remove_new = panel_dpi_remove, .driver = { .name = "panel-dpi", .of_match_table = panel_dpi_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 77fce1223a64..adb8881bac28 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -1241,7 +1241,7 @@ err_reg: return r; } -static int __exit dsicm_remove(struct platform_device *pdev) +static void dsicm_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -1269,8 +1269,6 @@ static int __exit dsicm_remove(struct platform_device *pdev) /* reset, to be sure that the panel is in a valid state */ dsicm_hw_reset(ddata); - - return 0; } static const struct of_device_id dsicm_of_match[] = { @@ -1282,11 +1280,10 @@ MODULE_DEVICE_TABLE(of, dsicm_of_match); static struct platform_driver dsicm_driver = { .probe = dsicm_probe, - .remove = __exit_p(dsicm_remove), + .remove_new = dsicm_remove, .driver = { .name = "panel-dsi-cm", .of_match_table = dsicm_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c index cc30758300e2..e37268cf8dca 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c @@ -292,7 +292,7 @@ err_reg: return r; } -static int __exit sharp_ls_remove(struct platform_device *pdev) +static void sharp_ls_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -304,8 +304,6 @@ static int __exit sharp_ls_remove(struct platform_device *pdev) sharp_ls_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id sharp_ls_of_match[] = { @@ -317,11 +315,10 @@ MODULE_DEVICE_TABLE(of, sharp_ls_of_match); static struct platform_driver sharp_ls_driver = { .probe = sharp_ls_probe, - .remove = __exit_p(sharp_ls_remove), + .remove_new = sharp_ls_remove, .driver = { .name = "panel-sharp-ls037v7dw01", .of_match_table = sharp_ls_of_match, - .suppress_bind_attrs = true, }, }; diff --git a/drivers/video/fbdev/omap2/omapfb/vrfb.c b/drivers/video/fbdev/omap2/omapfb/vrfb.c index ee0dd4c6a646..568e6e1eca62 100644 --- a/drivers/video/fbdev/omap2/omapfb/vrfb.c +++ b/drivers/video/fbdev/omap2/omapfb/vrfb.c @@ -368,17 +368,10 @@ static int __init vrfb_probe(struct platform_device *pdev) return 0; } -static void __exit vrfb_remove(struct platform_device *pdev) -{ - vrfb_loaded = false; -} - static struct platform_driver vrfb_driver = { .driver.name = "omapvrfb", - .remove = __exit_p(vrfb_remove), }; - -module_platform_driver_probe(vrfb_driver, vrfb_probe); +builtin_platform_driver_probe(vrfb_driver, vrfb_probe); MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>"); MODULE_DESCRIPTION("OMAP VRFB"); diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c index 58868f8880d6..a52b1ba43a48 100644 --- a/drivers/video/fbdev/via/viafbdev.c +++ b/drivers/video/fbdev/via/viafbdev.c @@ -574,7 +574,7 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg) break; case VIAFB_SET_GAMMA_LUT: - viafb_gamma_table = memdup_user(argp, 256 * sizeof(u32)); + viafb_gamma_table = memdup_array_user(argp, 256, sizeof(u32)); if (IS_ERR(viafb_gamma_table)) return PTR_ERR(viafb_gamma_table); viafb_set_gamma_table(viafb_bpp, viafb_gamma_table); diff --git a/drivers/vlynq/Kconfig b/drivers/vlynq/Kconfig deleted file mode 100644 index e7f9492a0b04..000000000000 --- a/drivers/vlynq/Kconfig +++ /dev/null @@ -1,21 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menu "TI VLYNQ" - depends on AR7 - -config VLYNQ - bool "TI VLYNQ bus support" - help - Support for Texas Instruments(R) VLYNQ bus. - The VLYNQ bus is a high-speed, serial and packetized - data bus which allows external peripherals of a SoC - to appear into the system's main memory. - - If unsure, say N - -config VLYNQ_DEBUG - bool "VLYNQ bus debug" - depends on VLYNQ && DEBUG_KERNEL - help - Turn on VLYNQ bus debugging. - -endmenu diff --git a/drivers/vlynq/Makefile b/drivers/vlynq/Makefile deleted file mode 100644 index d9ce5b2b5ce0..000000000000 --- a/drivers/vlynq/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for kernel vlynq drivers -# - -obj-$(CONFIG_VLYNQ) += vlynq.o diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c deleted file mode 100644 index 4af6615808cc..000000000000 --- a/drivers/vlynq/vlynq.c +++ /dev/null @@ -1,799 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org> - * - * Parts of the VLYNQ specification can be found here: - * http://www.ti.com/litv/pdf/sprue36a - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/platform_device.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/slab.h> -#include <linux/irq.h> - -#include <linux/vlynq.h> - -#define VLYNQ_CTRL_PM_ENABLE 0x80000000 -#define VLYNQ_CTRL_CLOCK_INT 0x00008000 -#define VLYNQ_CTRL_CLOCK_DIV(x) (((x) & 7) << 16) -#define VLYNQ_CTRL_INT_LOCAL 0x00004000 -#define VLYNQ_CTRL_INT_ENABLE 0x00002000 -#define VLYNQ_CTRL_INT_VECTOR(x) (((x) & 0x1f) << 8) -#define VLYNQ_CTRL_INT2CFG 0x00000080 -#define VLYNQ_CTRL_RESET 0x00000001 - -#define VLYNQ_CTRL_CLOCK_MASK (0x7 << 16) - -#define VLYNQ_INT_OFFSET 0x00000014 -#define VLYNQ_REMOTE_OFFSET 0x00000080 - -#define VLYNQ_STATUS_LINK 0x00000001 -#define VLYNQ_STATUS_LERROR 0x00000080 -#define VLYNQ_STATUS_RERROR 0x00000100 - -#define VINT_ENABLE 0x00000100 -#define VINT_TYPE_EDGE 0x00000080 -#define VINT_LEVEL_LOW 0x00000040 -#define VINT_VECTOR(x) ((x) & 0x1f) -#define VINT_OFFSET(irq) (8 * ((irq) % 4)) - -#define VLYNQ_AUTONEGO_V2 0x00010000 - -struct vlynq_regs { - u32 revision; - u32 control; - u32 status; - u32 int_prio; - u32 int_status; - u32 int_pending; - u32 int_ptr; - u32 tx_offset; - struct vlynq_mapping rx_mapping[4]; - u32 chip; - u32 autonego; - u32 unused[6]; - u32 int_device[8]; -}; - -#ifdef CONFIG_VLYNQ_DEBUG -static void vlynq_dump_regs(struct vlynq_device *dev) -{ - int i; - - printk(KERN_DEBUG "VLYNQ local=%p remote=%p\n", - dev->local, dev->remote); - for (i = 0; i < 32; i++) { - printk(KERN_DEBUG "VLYNQ: local %d: %08x\n", - i + 1, ((u32 *)dev->local)[i]); - printk(KERN_DEBUG "VLYNQ: remote %d: %08x\n", - i + 1, ((u32 *)dev->remote)[i]); - } -} - -static void vlynq_dump_mem(u32 *base, int count) -{ - int i; - - for (i = 0; i < (count + 3) / 4; i++) { - if (i % 4 == 0) - printk(KERN_DEBUG "\nMEM[0x%04x]:", i * 4); - printk(KERN_DEBUG " 0x%08x", *(base + i)); - } - printk(KERN_DEBUG "\n"); -} -#endif - -/* Check the VLYNQ link status with a given device */ -static int vlynq_linked(struct vlynq_device *dev) -{ - int i; - - for (i = 0; i < 100; i++) - if (readl(&dev->local->status) & VLYNQ_STATUS_LINK) - return 1; - else - cpu_relax(); - - return 0; -} - -static void vlynq_reset(struct vlynq_device *dev) -{ - writel(readl(&dev->local->control) | VLYNQ_CTRL_RESET, - &dev->local->control); - - /* Wait for the devices to finish resetting */ - msleep(5); - - /* Remove reset bit */ - writel(readl(&dev->local->control) & ~VLYNQ_CTRL_RESET, - &dev->local->control); - - /* Give some time for the devices to settle */ - msleep(5); -} - -static void vlynq_irq_unmask(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - val |= (VINT_ENABLE | virq) << VINT_OFFSET(virq); - writel(val, &dev->remote->int_device[virq >> 2]); -} - -static void vlynq_irq_mask(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - val &= ~(VINT_ENABLE << VINT_OFFSET(virq)); - writel(val, &dev->remote->int_device[virq >> 2]); -} - -static int vlynq_irq_type(struct irq_data *d, unsigned int flow_type) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - switch (flow_type & IRQ_TYPE_SENSE_MASK) { - case IRQ_TYPE_EDGE_RISING: - case IRQ_TYPE_EDGE_FALLING: - case IRQ_TYPE_EDGE_BOTH: - val |= VINT_TYPE_EDGE << VINT_OFFSET(virq); - val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq)); - break; - case IRQ_TYPE_LEVEL_HIGH: - val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq)); - val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq)); - break; - case IRQ_TYPE_LEVEL_LOW: - val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq)); - val |= VINT_LEVEL_LOW << VINT_OFFSET(virq); - break; - default: - return -EINVAL; - } - writel(val, &dev->remote->int_device[virq >> 2]); - return 0; -} - -static void vlynq_local_ack(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - u32 status = readl(&dev->local->status); - - pr_debug("%s: local status: 0x%08x\n", - dev_name(&dev->dev), status); - writel(status, &dev->local->status); -} - -static void vlynq_remote_ack(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - u32 status = readl(&dev->remote->status); - - pr_debug("%s: remote status: 0x%08x\n", - dev_name(&dev->dev), status); - writel(status, &dev->remote->status); -} - -static irqreturn_t vlynq_irq(int irq, void *dev_id) -{ - struct vlynq_device *dev = dev_id; - u32 status; - int virq = 0; - - status = readl(&dev->local->int_status); - writel(status, &dev->local->int_status); - - if (unlikely(!status)) - spurious_interrupt(); - - while (status) { - if (status & 1) - do_IRQ(dev->irq_start + virq); - status >>= 1; - virq++; - } - - return IRQ_HANDLED; -} - -static struct irq_chip vlynq_irq_chip = { - .name = "vlynq", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_set_type = vlynq_irq_type, -}; - -static struct irq_chip vlynq_local_chip = { - .name = "vlynq local error", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_ack = vlynq_local_ack, -}; - -static struct irq_chip vlynq_remote_chip = { - .name = "vlynq local error", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_ack = vlynq_remote_ack, -}; - -static int vlynq_setup_irq(struct vlynq_device *dev) -{ - u32 val; - int i, virq; - - if (dev->local_irq == dev->remote_irq) { - printk(KERN_ERR - "%s: local vlynq irq should be different from remote\n", - dev_name(&dev->dev)); - return -EINVAL; - } - - /* Clear local and remote error bits */ - writel(readl(&dev->local->status), &dev->local->status); - writel(readl(&dev->remote->status), &dev->remote->status); - - /* Now setup interrupts */ - val = VLYNQ_CTRL_INT_VECTOR(dev->local_irq); - val |= VLYNQ_CTRL_INT_ENABLE | VLYNQ_CTRL_INT_LOCAL | - VLYNQ_CTRL_INT2CFG; - val |= readl(&dev->local->control); - writel(VLYNQ_INT_OFFSET, &dev->local->int_ptr); - writel(val, &dev->local->control); - - val = VLYNQ_CTRL_INT_VECTOR(dev->remote_irq); - val |= VLYNQ_CTRL_INT_ENABLE; - val |= readl(&dev->remote->control); - writel(VLYNQ_INT_OFFSET, &dev->remote->int_ptr); - writel(val, &dev->remote->int_ptr); - writel(val, &dev->remote->control); - - for (i = dev->irq_start; i <= dev->irq_end; i++) { - virq = i - dev->irq_start; - if (virq == dev->local_irq) { - irq_set_chip_and_handler(i, &vlynq_local_chip, - handle_level_irq); - irq_set_chip_data(i, dev); - } else if (virq == dev->remote_irq) { - irq_set_chip_and_handler(i, &vlynq_remote_chip, - handle_level_irq); - irq_set_chip_data(i, dev); - } else { - irq_set_chip_and_handler(i, &vlynq_irq_chip, - handle_simple_irq); - irq_set_chip_data(i, dev); - writel(0, &dev->remote->int_device[virq >> 2]); - } - } - - if (request_irq(dev->irq, vlynq_irq, IRQF_SHARED, "vlynq", dev)) { - printk(KERN_ERR "%s: request_irq failed\n", - dev_name(&dev->dev)); - return -EAGAIN; - } - - return 0; -} - -static void vlynq_device_release(struct device *dev) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - kfree(vdev); -} - -static int vlynq_device_match(struct device *dev, - struct device_driver *drv) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - struct vlynq_driver *vdrv = to_vlynq_driver(drv); - struct vlynq_device_id *ids = vdrv->id_table; - - while (ids->id) { - if (ids->id == vdev->dev_id) { - vdev->divisor = ids->divisor; - vlynq_set_drvdata(vdev, ids); - printk(KERN_INFO "Driver found for VLYNQ " - "device: %08x\n", vdev->dev_id); - return 1; - } - printk(KERN_DEBUG "Not using the %08x VLYNQ device's driver" - " for VLYNQ device: %08x\n", ids->id, vdev->dev_id); - ids++; - } - return 0; -} - -static int vlynq_device_probe(struct device *dev) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - struct vlynq_driver *drv = to_vlynq_driver(dev->driver); - struct vlynq_device_id *id = vlynq_get_drvdata(vdev); - int result = -ENODEV; - - if (drv->probe) - result = drv->probe(vdev, id); - if (result) - put_device(dev); - return result; -} - -static void vlynq_device_remove(struct device *dev) -{ - struct vlynq_driver *drv = to_vlynq_driver(dev->driver); - - if (drv->remove) - drv->remove(to_vlynq_device(dev)); -} - -int __vlynq_register_driver(struct vlynq_driver *driver, struct module *owner) -{ - driver->driver.name = driver->name; - driver->driver.bus = &vlynq_bus_type; - return driver_register(&driver->driver); -} -EXPORT_SYMBOL(__vlynq_register_driver); - -void vlynq_unregister_driver(struct vlynq_driver *driver) -{ - driver_unregister(&driver->driver); -} -EXPORT_SYMBOL(vlynq_unregister_driver); - -/* - * A VLYNQ remote device can clock the VLYNQ bus master - * using a dedicated clock line. In that case, both the - * remove device and the bus master should have the same - * serial clock dividers configured. Iterate through the - * 8 possible dividers until we actually link with the - * device. - */ -static int __vlynq_try_remote(struct vlynq_device *dev) -{ - int i; - - vlynq_reset(dev); - for (i = dev->dev_id ? vlynq_rdiv2 : vlynq_rdiv8; dev->dev_id ? - i <= vlynq_rdiv8 : i >= vlynq_rdiv2; - dev->dev_id ? i++ : i--) { - - if (!vlynq_linked(dev)) - break; - - writel((readl(&dev->remote->control) & - ~VLYNQ_CTRL_CLOCK_MASK) | - VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1), - &dev->remote->control); - writel((readl(&dev->local->control) - & ~(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_MASK)) | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using remote clock divisor %d\n", - dev_name(&dev->dev), i - vlynq_rdiv1 + 1); - dev->divisor = i; - return 0; - } else { - vlynq_reset(dev); - } - } - - return -ENODEV; -} - -/* - * A VLYNQ remote device can be clocked by the VLYNQ bus - * master using a dedicated clock line. In that case, only - * the bus master configures the serial clock divider. - * Iterate through the 8 possible dividers until we - * actually get a link with the device. - */ -static int __vlynq_try_local(struct vlynq_device *dev) -{ - int i; - - vlynq_reset(dev); - - for (i = dev->dev_id ? vlynq_ldiv2 : vlynq_ldiv8; dev->dev_id ? - i <= vlynq_ldiv8 : i >= vlynq_ldiv2; - dev->dev_id ? i++ : i--) { - - writel((readl(&dev->local->control) & - ~VLYNQ_CTRL_CLOCK_MASK) | - VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_ldiv1), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using local clock divisor %d\n", - dev_name(&dev->dev), i - vlynq_ldiv1 + 1); - dev->divisor = i; - return 0; - } else { - vlynq_reset(dev); - } - } - - return -ENODEV; -} - -/* - * When using external clocking method, serial clock - * is supplied by an external oscillator, therefore we - * should mask the local clock bit in the clock control - * register for both the bus master and the remote device. - */ -static int __vlynq_try_external(struct vlynq_device *dev) -{ - vlynq_reset(dev); - if (!vlynq_linked(dev)) - return -ENODEV; - - writel((readl(&dev->remote->control) & - ~VLYNQ_CTRL_CLOCK_INT), - &dev->remote->control); - - writel((readl(&dev->local->control) & - ~VLYNQ_CTRL_CLOCK_INT), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG "%s: using external clock\n", - dev_name(&dev->dev)); - dev->divisor = vlynq_div_external; - return 0; - } - - return -ENODEV; -} - -static int __vlynq_enable_device(struct vlynq_device *dev) -{ - int result; - struct plat_vlynq_ops *ops = dev->dev.platform_data; - - result = ops->on(dev); - if (result) - return result; - - switch (dev->divisor) { - case vlynq_div_external: - case vlynq_div_auto: - /* When the device is brought from reset it should have clock - * generation negotiated by hardware. - * Check which device is generating clocks and perform setup - * accordingly */ - if (vlynq_linked(dev) && readl(&dev->remote->control) & - VLYNQ_CTRL_CLOCK_INT) { - if (!__vlynq_try_remote(dev) || - !__vlynq_try_local(dev) || - !__vlynq_try_external(dev)) - return 0; - } else { - if (!__vlynq_try_external(dev) || - !__vlynq_try_local(dev) || - !__vlynq_try_remote(dev)) - return 0; - } - break; - case vlynq_ldiv1: - case vlynq_ldiv2: - case vlynq_ldiv3: - case vlynq_ldiv4: - case vlynq_ldiv5: - case vlynq_ldiv6: - case vlynq_ldiv7: - case vlynq_ldiv8: - writel(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(dev->divisor - - vlynq_ldiv1), &dev->local->control); - writel(0, &dev->remote->control); - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using local clock divisor %d\n", - dev_name(&dev->dev), - dev->divisor - vlynq_ldiv1 + 1); - return 0; - } - break; - case vlynq_rdiv1: - case vlynq_rdiv2: - case vlynq_rdiv3: - case vlynq_rdiv4: - case vlynq_rdiv5: - case vlynq_rdiv6: - case vlynq_rdiv7: - case vlynq_rdiv8: - writel(0, &dev->local->control); - writel(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(dev->divisor - - vlynq_rdiv1), &dev->remote->control); - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using remote clock divisor %d\n", - dev_name(&dev->dev), - dev->divisor - vlynq_rdiv1 + 1); - return 0; - } - break; - } - - ops->off(dev); - return -ENODEV; -} - -int vlynq_enable_device(struct vlynq_device *dev) -{ - struct plat_vlynq_ops *ops = dev->dev.platform_data; - int result = -ENODEV; - - result = __vlynq_enable_device(dev); - if (result) - return result; - - result = vlynq_setup_irq(dev); - if (result) - ops->off(dev); - - dev->enabled = !result; - return result; -} -EXPORT_SYMBOL(vlynq_enable_device); - - -void vlynq_disable_device(struct vlynq_device *dev) -{ - struct plat_vlynq_ops *ops = dev->dev.platform_data; - - dev->enabled = 0; - free_irq(dev->irq, dev); - ops->off(dev); -} -EXPORT_SYMBOL(vlynq_disable_device); - -int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping) -{ - int i; - - if (!dev->enabled) - return -ENXIO; - - writel(tx_offset, &dev->local->tx_offset); - for (i = 0; i < 4; i++) { - writel(mapping[i].offset, &dev->local->rx_mapping[i].offset); - writel(mapping[i].size, &dev->local->rx_mapping[i].size); - } - return 0; -} -EXPORT_SYMBOL(vlynq_set_local_mapping); - -int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping) -{ - int i; - - if (!dev->enabled) - return -ENXIO; - - writel(tx_offset, &dev->remote->tx_offset); - for (i = 0; i < 4; i++) { - writel(mapping[i].offset, &dev->remote->rx_mapping[i].offset); - writel(mapping[i].size, &dev->remote->rx_mapping[i].size); - } - return 0; -} -EXPORT_SYMBOL(vlynq_set_remote_mapping); - -int vlynq_set_local_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if (dev->enabled) - return -EBUSY; - - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - if (virq == dev->remote_irq) - return -EINVAL; - - dev->local_irq = virq; - - return 0; -} -EXPORT_SYMBOL(vlynq_set_local_irq); - -int vlynq_set_remote_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if (dev->enabled) - return -EBUSY; - - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - if (virq == dev->local_irq) - return -EINVAL; - - dev->remote_irq = virq; - - return 0; -} -EXPORT_SYMBOL(vlynq_set_remote_irq); - -static int vlynq_probe(struct platform_device *pdev) -{ - struct vlynq_device *dev; - struct resource *regs_res, *mem_res, *irq_res; - int len, result; - - regs_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - if (!regs_res) - return -ENODEV; - - mem_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mem"); - if (!mem_res) - return -ENODEV; - - irq_res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "devirq"); - if (!irq_res) - return -ENODEV; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - printk(KERN_ERR - "vlynq: failed to allocate device structure\n"); - return -ENOMEM; - } - - dev->id = pdev->id; - dev->dev.bus = &vlynq_bus_type; - dev->dev.parent = &pdev->dev; - dev_set_name(&dev->dev, "vlynq%d", dev->id); - dev->dev.platform_data = pdev->dev.platform_data; - dev->dev.release = vlynq_device_release; - - dev->regs_start = regs_res->start; - dev->regs_end = regs_res->end; - dev->mem_start = mem_res->start; - dev->mem_end = mem_res->end; - - len = resource_size(regs_res); - if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { - printk(KERN_ERR "%s: Can't request vlynq registers\n", - dev_name(&dev->dev)); - result = -ENXIO; - goto fail_request; - } - - dev->local = ioremap(regs_res->start, len); - if (!dev->local) { - printk(KERN_ERR "%s: Can't remap vlynq registers\n", - dev_name(&dev->dev)); - result = -ENXIO; - goto fail_remap; - } - - dev->remote = (struct vlynq_regs *)((void *)dev->local + - VLYNQ_REMOTE_OFFSET); - - dev->irq = platform_get_irq_byname(pdev, "irq"); - dev->irq_start = irq_res->start; - dev->irq_end = irq_res->end; - dev->local_irq = dev->irq_end - dev->irq_start; - dev->remote_irq = dev->local_irq - 1; - - if (device_register(&dev->dev)) - goto fail_register; - platform_set_drvdata(pdev, dev); - - printk(KERN_INFO "%s: regs 0x%p, irq %d, mem 0x%p\n", - dev_name(&dev->dev), (void *)dev->regs_start, dev->irq, - (void *)dev->mem_start); - - dev->dev_id = 0; - dev->divisor = vlynq_div_auto; - result = __vlynq_enable_device(dev); - if (result == 0) { - dev->dev_id = readl(&dev->remote->chip); - ((struct plat_vlynq_ops *)(dev->dev.platform_data))->off(dev); - } - if (dev->dev_id) - printk(KERN_INFO "Found a VLYNQ device: %08x\n", dev->dev_id); - - return 0; - -fail_register: - iounmap(dev->local); -fail_remap: -fail_request: - release_mem_region(regs_res->start, len); - kfree(dev); - return result; -} - -static int vlynq_remove(struct platform_device *pdev) -{ - struct vlynq_device *dev = platform_get_drvdata(pdev); - - device_unregister(&dev->dev); - iounmap(dev->local); - release_mem_region(dev->regs_start, - dev->regs_end - dev->regs_start + 1); - - kfree(dev); - - return 0; -} - -static struct platform_driver vlynq_platform_driver = { - .driver.name = "vlynq", - .probe = vlynq_probe, - .remove = vlynq_remove, -}; - -struct bus_type vlynq_bus_type = { - .name = "vlynq", - .match = vlynq_device_match, - .probe = vlynq_device_probe, - .remove = vlynq_device_remove, -}; -EXPORT_SYMBOL(vlynq_bus_type); - -static int vlynq_init(void) -{ - int res = 0; - - res = bus_register(&vlynq_bus_type); - if (res) - goto fail_bus; - - res = platform_driver_register(&vlynq_platform_driver); - if (res) - goto fail_platform; - - return 0; - -fail_platform: - bus_unregister(&vlynq_bus_type); -fail_bus: - return res; -} - -static void vlynq_exit(void) -{ - platform_driver_unregister(&vlynq_platform_driver); - bus_unregister(&vlynq_bus_type); -} - -module_init(vlynq_init); -module_exit(vlynq_exit); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 8cb6fa45d599..7d22051b15a2 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1768,12 +1768,6 @@ config SIBYTE_WDOG To compile this driver as a loadable module, choose M here. The module will be called sb_wdog. -config AR7_WDT - tristate "TI AR7 Watchdog Timer" - depends on AR7 || (MIPS && 32BIT && COMPILE_TEST) - help - Hardware driver for the TI AR7 Watchdog Timer. - config TXX9_WDT tristate "Toshiba TXx9 Watchdog Timer" depends on CPU_TX49XX || (MIPS && COMPILE_TEST) diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 7eab9de311cb..7cbc34514ec1 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -168,7 +168,6 @@ obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o -obj-$(CONFIG_AR7_WDT) += ar7_wdt.o obj-$(CONFIG_TXX9_WDT) += txx9wdt.o obj-$(CONFIG_OCTEON_WDT) += octeon-wdt.o octeon-wdt-y := octeon-wdt-main.o octeon-wdt-nmi.o diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c index eddeb0fede89..d4f739932f0b 100644 --- a/drivers/watchdog/apple_wdt.c +++ b/drivers/watchdog/apple_wdt.c @@ -173,6 +173,8 @@ static int apple_wdt_probe(struct platform_device *pdev) if (!wdt->clk_rate) return -EINVAL; + platform_set_drvdata(pdev, wdt); + wdt->wdd.ops = &apple_wdt_ops; wdt->wdd.info = &apple_wdt_info; wdt->wdd.max_timeout = U32_MAX / wdt->clk_rate; @@ -190,6 +192,28 @@ static int apple_wdt_probe(struct platform_device *pdev) return devm_watchdog_register_device(dev, &wdt->wdd); } +static int apple_wdt_resume(struct device *dev) +{ + struct apple_wdt *wdt = dev_get_drvdata(dev); + + if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd)) + apple_wdt_start(&wdt->wdd); + + return 0; +} + +static int apple_wdt_suspend(struct device *dev) +{ + struct apple_wdt *wdt = dev_get_drvdata(dev); + + if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd)) + apple_wdt_stop(&wdt->wdd); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(apple_wdt_pm_ops, apple_wdt_suspend, apple_wdt_resume); + static const struct of_device_id apple_wdt_of_match[] = { { .compatible = "apple,wdt" }, {}, @@ -200,6 +224,7 @@ static struct platform_driver apple_wdt_driver = { .driver = { .name = "apple-watchdog", .of_match_table = apple_wdt_of_match, + .pm = pm_sleep_ptr(&apple_wdt_pm_ops), }, .probe = apple_wdt_probe, }; diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c deleted file mode 100644 index cdcaeb0961ac..000000000000 --- a/drivers/watchdog/ar7_wdt.c +++ /dev/null @@ -1,315 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * drivers/watchdog/ar7_wdt.c - * - * Copyright (C) 2007 Nicolas Thill <nico@openwrt.org> - * Copyright (c) 2005 Enrik Berkhan <Enrik.Berkhan@akk.org> - * - * Some code taken from: - * National Semiconductor SCx200 Watchdog support - * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/platform_device.h> -#include <linux/watchdog.h> -#include <linux/fs.h> -#include <linux/ioport.h> -#include <linux/io.h> -#include <linux/uaccess.h> -#include <linux/clk.h> - -#include <asm/addrspace.h> -#include <asm/mach-ar7/ar7.h> - -#define LONGNAME "TI AR7 Watchdog Timer" - -MODULE_AUTHOR("Nicolas Thill <nico@openwrt.org>"); -MODULE_DESCRIPTION(LONGNAME); -MODULE_LICENSE("GPL"); - -static int margin = 60; -module_param(margin, int, 0); -MODULE_PARM_DESC(margin, "Watchdog margin in seconds"); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close"); - -#define READ_REG(x) readl((void __iomem *)&(x)) -#define WRITE_REG(x, v) writel((v), (void __iomem *)&(x)) - -struct ar7_wdt { - u32 kick_lock; - u32 kick; - u32 change_lock; - u32 change; - u32 disable_lock; - u32 disable; - u32 prescale_lock; - u32 prescale; -}; - -static unsigned long wdt_is_open; -static unsigned expect_close; -static DEFINE_SPINLOCK(wdt_lock); - -/* XXX currently fixed, allows max margin ~68.72 secs */ -#define prescale_value 0xffff - -/* Pointer to the remapped WDT IO space */ -static struct ar7_wdt *ar7_wdt; - -static struct clk *vbus_clk; - -static void ar7_wdt_kick(u32 value) -{ - WRITE_REG(ar7_wdt->kick_lock, 0x5555); - if ((READ_REG(ar7_wdt->kick_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->kick_lock, 0xaaaa); - if ((READ_REG(ar7_wdt->kick_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->kick, value); - return; - } - } - pr_err("failed to unlock WDT kick reg\n"); -} - -static void ar7_wdt_prescale(u32 value) -{ - WRITE_REG(ar7_wdt->prescale_lock, 0x5a5a); - if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->prescale_lock, 0xa5a5); - if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->prescale, value); - return; - } - } - pr_err("failed to unlock WDT prescale reg\n"); -} - -static void ar7_wdt_change(u32 value) -{ - WRITE_REG(ar7_wdt->change_lock, 0x6666); - if ((READ_REG(ar7_wdt->change_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->change_lock, 0xbbbb); - if ((READ_REG(ar7_wdt->change_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->change, value); - return; - } - } - pr_err("failed to unlock WDT change reg\n"); -} - -static void ar7_wdt_disable(u32 value) -{ - WRITE_REG(ar7_wdt->disable_lock, 0x7777); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->disable_lock, 0xcccc); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 2) { - WRITE_REG(ar7_wdt->disable_lock, 0xdddd); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->disable, value); - return; - } - } - } - pr_err("failed to unlock WDT disable reg\n"); -} - -static void ar7_wdt_update_margin(int new_margin) -{ - u32 change; - u32 vbus_rate; - - vbus_rate = clk_get_rate(vbus_clk); - change = new_margin * (vbus_rate / prescale_value); - if (change < 1) - change = 1; - if (change > 0xffff) - change = 0xffff; - ar7_wdt_change(change); - margin = change * prescale_value / vbus_rate; - pr_info("timer margin %d seconds (prescale %d, change %d, freq %d)\n", - margin, prescale_value, change, vbus_rate); -} - -static void ar7_wdt_enable_wdt(void) -{ - pr_debug("enabling watchdog timer\n"); - ar7_wdt_disable(1); - ar7_wdt_kick(1); -} - -static void ar7_wdt_disable_wdt(void) -{ - pr_debug("disabling watchdog timer\n"); - ar7_wdt_disable(0); -} - -static int ar7_wdt_open(struct inode *inode, struct file *file) -{ - /* only allow one at a time */ - if (test_and_set_bit(0, &wdt_is_open)) - return -EBUSY; - ar7_wdt_enable_wdt(); - expect_close = 0; - - return stream_open(inode, file); -} - -static int ar7_wdt_release(struct inode *inode, struct file *file) -{ - if (!expect_close) - pr_warn("watchdog device closed unexpectedly, will not disable the watchdog timer\n"); - else if (!nowayout) - ar7_wdt_disable_wdt(); - clear_bit(0, &wdt_is_open); - return 0; -} - -static ssize_t ar7_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) -{ - /* check for a magic close character */ - if (len) { - size_t i; - - spin_lock(&wdt_lock); - ar7_wdt_kick(1); - spin_unlock(&wdt_lock); - - expect_close = 0; - for (i = 0; i < len; ++i) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_close = 1; - } - - } - return len; -} - -static long ar7_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - static const struct watchdog_info ident = { - .identity = LONGNAME, - .firmware_version = 1, - .options = (WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE), - }; - int new_margin; - - switch (cmd) { - case WDIOC_GETSUPPORT: - if (copy_to_user((struct watchdog_info *)arg, &ident, - sizeof(ident))) - return -EFAULT; - return 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - if (put_user(0, (int *)arg)) - return -EFAULT; - return 0; - case WDIOC_KEEPALIVE: - ar7_wdt_kick(1); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, (int *)arg)) - return -EFAULT; - if (new_margin < 1) - return -EINVAL; - - spin_lock(&wdt_lock); - ar7_wdt_update_margin(new_margin); - ar7_wdt_kick(1); - spin_unlock(&wdt_lock); - fallthrough; - case WDIOC_GETTIMEOUT: - if (put_user(margin, (int *)arg)) - return -EFAULT; - return 0; - default: - return -ENOTTY; - } -} - -static const struct file_operations ar7_wdt_fops = { - .owner = THIS_MODULE, - .write = ar7_wdt_write, - .unlocked_ioctl = ar7_wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = ar7_wdt_open, - .release = ar7_wdt_release, - .llseek = no_llseek, -}; - -static struct miscdevice ar7_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ar7_wdt_fops, -}; - -static int ar7_wdt_probe(struct platform_device *pdev) -{ - int rc; - - ar7_wdt = devm_platform_ioremap_resource_byname(pdev, "regs"); - if (IS_ERR(ar7_wdt)) - return PTR_ERR(ar7_wdt); - - vbus_clk = clk_get(NULL, "vbus"); - if (IS_ERR(vbus_clk)) { - pr_err("could not get vbus clock\n"); - return PTR_ERR(vbus_clk); - } - - ar7_wdt_disable_wdt(); - ar7_wdt_prescale(prescale_value); - ar7_wdt_update_margin(margin); - - rc = misc_register(&ar7_wdt_miscdev); - if (rc) { - pr_err("unable to register misc device\n"); - goto out; - } - return 0; - -out: - clk_put(vbus_clk); - vbus_clk = NULL; - return rc; -} - -static void ar7_wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&ar7_wdt_miscdev); - clk_put(vbus_clk); - vbus_clk = NULL; -} - -static void ar7_wdt_shutdown(struct platform_device *pdev) -{ - if (!nowayout) - ar7_wdt_disable_wdt(); -} - -static struct platform_driver ar7_wdt_driver = { - .probe = ar7_wdt_probe, - .remove_new = ar7_wdt_remove, - .shutdown = ar7_wdt_shutdown, - .driver = { - .name = "ar7_wdt", - }, -}; - -module_platform_driver(ar7_wdt_driver); diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index b72a858bbac7..b4773a6aaf8c 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -79,6 +79,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) #define WDT_CLEAR_TIMEOUT_STATUS 0x14 #define WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION BIT(0) +#define WDT_RESET_MASK1 0x1c +#define WDT_RESET_MASK2 0x20 /* * WDT_RESET_WIDTH controls the characteristics of the external pulse (if @@ -402,6 +404,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev) if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) || (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { + u32 reset_mask[2]; + size_t nrstmask = of_device_is_compatible(np, "aspeed,ast2600-wdt") ? 2 : 1; u32 reg = readl(wdt->base + WDT_RESET_WIDTH); reg &= wdt->cfg->ext_pulse_width_mask; @@ -419,6 +423,13 @@ static int aspeed_wdt_probe(struct platform_device *pdev) reg |= WDT_OPEN_DRAIN_MAGIC; writel(reg, wdt->base + WDT_RESET_WIDTH); + + ret = of_property_read_u32_array(np, "aspeed,reset-mask", reset_mask, nrstmask); + if (!ret) { + writel(reset_mask[0], wdt->base + WDT_RESET_MASK1); + if (nrstmask > 1) + writel(reset_mask[1], wdt->base + WDT_RESET_MASK2); + } } if (!of_property_read_u32(np, "aspeed,ext-pulse-duration", &duration)) { diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index fed7be246442..b111b28acb94 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -348,25 +348,21 @@ static int __init at91wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - wdt->sclk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(wdt->sclk)) - return PTR_ERR(wdt->sclk); - - err = clk_prepare_enable(wdt->sclk); - if (err) { + wdt->sclk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(wdt->sclk)) { dev_err(&pdev->dev, "Could not enable slow clock\n"); - return err; + return PTR_ERR(wdt->sclk); } if (pdev->dev.of_node) { err = of_at91wdt_init(pdev->dev.of_node, wdt); if (err) - goto err_clk; + return err; } err = at91_wdt_init(pdev, wdt); if (err) - goto err_clk; + return err; platform_set_drvdata(pdev, wdt); @@ -374,11 +370,6 @@ static int __init at91wdt_probe(struct platform_device *pdev) wdt->wdd.timeout, wdt->nowayout); return 0; - -err_clk: - clk_disable_unprepare(wdt->sclk); - - return err; } static int __exit at91wdt_remove(struct platform_device *pdev) @@ -388,7 +379,6 @@ static int __exit at91wdt_remove(struct platform_device *pdev) pr_warn("I quit now, hardware will probably reboot!\n"); del_timer(&wdt->timer); - clk_disable_unprepare(wdt->sclk); return 0; } diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index b7b705060438..e5cc30622b12 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -257,19 +257,13 @@ static int ath79_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt_base)) return PTR_ERR(wdt_base); - wdt_clk = devm_clk_get(&pdev->dev, "wdt"); + wdt_clk = devm_clk_get_enabled(&pdev->dev, "wdt"); if (IS_ERR(wdt_clk)) return PTR_ERR(wdt_clk); - err = clk_prepare_enable(wdt_clk); - if (err) - return err; - wdt_freq = clk_get_rate(wdt_clk); - if (!wdt_freq) { - err = -EINVAL; - goto err_clk_disable; - } + if (!wdt_freq) + return -EINVAL; max_timeout = (0xfffffffful / wdt_freq); if (timeout < 1 || timeout > max_timeout) { @@ -286,20 +280,15 @@ static int ath79_wdt_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "unable to register misc device, err=%d\n", err); - goto err_clk_disable; + return err; } return 0; - -err_clk_disable: - clk_disable_unprepare(wdt_clk); - return err; } static void ath79_wdt_remove(struct platform_device *pdev) { misc_deregister(&ath79_wdt_miscdev); - clk_disable_unprepare(wdt_clk); } static void ath79_wdt_shutdown(struct platform_device *pdev) diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index 0923201ce874..a7b814ea740b 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -5,12 +5,13 @@ * Author: 2013, Alexander Shiyan <shc_work@mail.ru> */ -#include <linux/err.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/err.h> #include <linux/gpio/consumer.h> -#include <linux/of.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/watchdog.h> static bool nowayout = WATCHDOG_NOWAYOUT; @@ -106,7 +107,6 @@ static const struct watchdog_ops gpio_wdt_ops = { static int gpio_wdt_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct gpio_wdt_priv *priv; enum gpiod_flags gflags; unsigned int hw_margin; @@ -119,7 +119,7 @@ static int gpio_wdt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - ret = of_property_read_string(np, "hw_algo", &algo); + ret = device_property_read_string(dev, "hw_algo", &algo); if (ret) return ret; if (!strcmp(algo, "toggle")) { @@ -136,16 +136,14 @@ static int gpio_wdt_probe(struct platform_device *pdev) if (IS_ERR(priv->gpiod)) return PTR_ERR(priv->gpiod); - ret = of_property_read_u32(np, - "hw_margin_ms", &hw_margin); + ret = device_property_read_u32(dev, "hw_margin_ms", &hw_margin); if (ret) return ret; /* Disallow values lower than 2 and higher than 65535 ms */ if (hw_margin < 2 || hw_margin > 65535) return -EINVAL; - priv->always_running = of_property_read_bool(np, - "always-running"); + priv->always_running = device_property_read_bool(dev, "always-running"); watchdog_set_drvdata(&priv->wdd, priv); diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c index c703586c6e5f..b21d7a74a42d 100644 --- a/drivers/watchdog/imx7ulp_wdt.c +++ b/drivers/watchdog/imx7ulp_wdt.c @@ -23,6 +23,7 @@ #define LPO_CLK_SHIFT 8 #define WDOG_CS_CLK (LPO_CLK << LPO_CLK_SHIFT) #define WDOG_CS_EN BIT(7) +#define WDOG_CS_INT_EN BIT(6) #define WDOG_CS_UPDATE BIT(5) #define WDOG_CS_WAIT BIT(1) #define WDOG_CS_STOP BIT(0) @@ -62,6 +63,7 @@ struct imx7ulp_wdt_device { void __iomem *base; struct clk *clk; bool post_rcs_wait; + bool ext_reset; const struct imx_wdt_hw_feature *hw; }; @@ -285,6 +287,9 @@ static int imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout if (wdt->hw->prescaler_enable) val |= WDOG_CS_PRES; + if (wdt->ext_reset) + val |= WDOG_CS_INT_EN; + do { ret = _imx7ulp_wdt_init(wdt, timeout, val); toval = readl(wdt->base + WDOG_TOVAL); @@ -321,6 +326,9 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev) return PTR_ERR(imx7ulp_wdt->clk); } + /* The WDOG may need to do external reset through dedicated pin */ + imx7ulp_wdt->ext_reset = of_property_read_bool(dev->of_node, "fsl,ext-reset-output"); + imx7ulp_wdt->post_rcs_wait = true; if (of_device_is_compatible(dev->of_node, "fsl,imx8ulp-wdt")) { diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 8ac021748d16..e51fe1b78518 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -34,6 +34,7 @@ #define SC_IRQ_WDOG 1 #define SC_IRQ_GROUP_WDOG 1 +#define SC_TIMER_ERR_BUSY 10 static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0000); @@ -61,7 +62,9 @@ static int imx_sc_wdt_start(struct watchdog_device *wdog) arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_START_WDOG, 0, 0, 0, 0, 0, 0, &res); - if (res.a0) + + /* Ignore if already enabled(SC_TIMER_ERR_BUSY) */ + if (res.a0 && res.a0 != SC_TIMER_ERR_BUSY) return -EACCES; arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_WDOG_ACT, diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index bb1122909396..e888b1bdd1f2 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -13,9 +13,9 @@ * http://www.ite.com.tw/ * * Support of the watchdog timers, which are available on - * IT8607, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686, - * IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728, - * IT8772, IT8783 and IT8784. + * IT8607, IT8613, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, + * IT8686, IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, + * IT8728, IT8772, IT8783 and IT8784. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -50,6 +50,7 @@ /* Chip Id numbers */ #define NO_DEV_ID 0xffff #define IT8607_ID 0x8607 +#define IT8613_ID 0x8613 #define IT8620_ID 0x8620 #define IT8622_ID 0x8622 #define IT8625_ID 0x8625 @@ -277,6 +278,7 @@ static int __init it87_wdt_init(void) max_units = 65535; break; case IT8607_ID: + case IT8613_ID: case IT8620_ID: case IT8622_ID: case IT8625_ID: diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 607ce4b8df57..ec0c08652ec2 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -105,6 +105,25 @@ static const struct watchdog_ops ixp4xx_wdt_ops = { .owner = THIS_MODULE, }; +/* + * The A0 version of the IXP422 had a bug in the watchdog making + * is useless, but we still need to use it to restart the system + * as it is the only way, so in this special case we register a + * "dummy" watchdog that doesn't really work, but will support + * the restart operation. + */ +static int ixp4xx_wdt_dummy(struct watchdog_device *wdd) +{ + return 0; +} + +static const struct watchdog_ops ixp4xx_wdt_restart_only_ops = { + .start = ixp4xx_wdt_dummy, + .stop = ixp4xx_wdt_dummy, + .restart = ixp4xx_wdt_restart, + .owner = THIS_MODULE, +}; + static const struct watchdog_info ixp4xx_wdt_info = { .options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE @@ -114,14 +133,17 @@ static const struct watchdog_info ixp4xx_wdt_info = { static int ixp4xx_wdt_probe(struct platform_device *pdev) { + static const struct watchdog_ops *iwdt_ops; struct device *dev = &pdev->dev; struct ixp4xx_wdt *iwdt; struct clk *clk; int ret; if (!(read_cpuid_id() & 0xf) && !cpu_is_ixp46x()) { - dev_err(dev, "Rev. A0 IXP42x CPU detected - watchdog disabled\n"); - return -ENODEV; + dev_info(dev, "Rev. A0 IXP42x CPU detected - only restart supported\n"); + iwdt_ops = &ixp4xx_wdt_restart_only_ops; + } else { + iwdt_ops = &ixp4xx_wdt_ops; } iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL); @@ -141,7 +163,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev) iwdt->rate = IXP4XX_TIMER_FREQ; iwdt->wdd.info = &ixp4xx_wdt_info; - iwdt->wdd.ops = &ixp4xx_wdt_ops; + iwdt->wdd.ops = iwdt_ops; iwdt->wdd.min_timeout = 1; iwdt->wdd.max_timeout = U32_MAX / iwdt->rate; iwdt->wdd.parent = dev; diff --git a/drivers/watchdog/marvell_gti_wdt.c b/drivers/watchdog/marvell_gti_wdt.c index d7eb8286e11e..098bb141a521 100644 --- a/drivers/watchdog/marvell_gti_wdt.c +++ b/drivers/watchdog/marvell_gti_wdt.c @@ -8,8 +8,8 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/module.h> -#include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/of.h> #include <linux/watchdog.h> /* @@ -190,6 +190,13 @@ static int gti_wdt_set_pretimeout(struct watchdog_device *wdev, struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev); struct watchdog_device *wdog_dev = &priv->wdev; + if (!timeout) { + /* Disable Interrupt */ + writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx), + priv->base + GTI_CWD_INT_ENA_CLR); + return 0; + } + /* pretimeout should 1/3 of max_timeout */ if (timeout * 3 <= wdog_dev->max_timeout) return gti_wdt_settimeout(wdev, timeout * 3); @@ -271,7 +278,7 @@ static int gti_wdt_probe(struct platform_device *pdev) &wdt_idx); if (!err) { if (wdt_idx >= priv->data->gti_num_timers) - return dev_err_probe(&pdev->dev, err, + return dev_err_probe(&pdev->dev, -EINVAL, "GTI wdog timer index not valid"); priv->wdt_timer_idx = wdt_idx; @@ -292,6 +299,7 @@ static int gti_wdt_probe(struct platform_device *pdev) /* Maximum timeout is 3 times the pretimeout */ wdog_dev->max_timeout = max_pretimeout * 3; + wdog_dev->max_hw_heartbeat_ms = max_pretimeout * 1000; /* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */ wdog_dev->min_timeout = 3; wdog_dev->timeout = wdog_dev->pretimeout; @@ -308,7 +316,7 @@ static int gti_wdt_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n"); + return irq; err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0, pdev->name, &priv->wdev); diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c index 9c5b6616fc87..667e2c5b3431 100644 --- a/drivers/watchdog/mlx_wdt.c +++ b/drivers/watchdog/mlx_wdt.c @@ -39,6 +39,7 @@ * @tleft_idx: index for direct access to time left register; * @ping_idx: index for direct access to ping register; * @reset_idx: index for direct access to reset cause register; + * @regmap_val_sz: size of value in register map; * @wd_type: watchdog HW type; */ struct mlxreg_wdt { diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c index 05657dc1d36a..352853e6fe71 100644 --- a/drivers/watchdog/of_xilinx_wdt.c +++ b/drivers/watchdog/of_xilinx_wdt.c @@ -187,7 +187,7 @@ static int xwdt_probe(struct platform_device *pdev) watchdog_set_nowayout(xilinx_wdt_wdd, enable_once); - xdev->clk = devm_clk_get_enabled(dev, NULL); + xdev->clk = devm_clk_get_prepared(dev, NULL); if (IS_ERR(xdev->clk)) { if (PTR_ERR(xdev->clk) != -ENOENT) return PTR_ERR(xdev->clk); @@ -218,18 +218,25 @@ static int xwdt_probe(struct platform_device *pdev) spin_lock_init(&xdev->spinlock); watchdog_set_drvdata(xilinx_wdt_wdd, xdev); + rc = clk_enable(xdev->clk); + if (rc) { + dev_err(dev, "unable to enable clock\n"); + return rc; + } + rc = xwdt_selftest(xdev); if (rc == XWT_TIMER_FAILED) { dev_err(dev, "SelfTest routine error\n"); + clk_disable(xdev->clk); return rc; } + clk_disable(xdev->clk); + rc = devm_watchdog_register_device(dev, xilinx_wdt_wdd); if (rc) return rc; - clk_disable(xdev->clk); - dev_info(dev, "Xilinx Watchdog Timer with timeout %ds\n", xilinx_wdt_wdd->timeout); diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 421ebcda62e6..5f23913ce3b4 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -152,14 +152,14 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd, timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000); if (action) - sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt); + sbsa_gwdt_reg_write((u64)gwdt->clk * timeout, gwdt); else /* * In the single stage mode, The first signal (WS0) is ignored, * the timeout is (WOR * 2), so the WOR should be configured * to half value of timeout. */ - sbsa_gwdt_reg_write(gwdt->clk / 2 * timeout, gwdt); + sbsa_gwdt_reg_write(((u64)gwdt->clk / 2) * timeout, gwdt); return 0; } diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c index d2aa43c00221..4c5b8d98a4f3 100644 --- a/drivers/watchdog/st_lpc_wdt.c +++ b/drivers/watchdog/st_lpc_wdt.c @@ -15,7 +15,6 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/regmap.h> #include <linux/watchdog.h> @@ -42,7 +41,7 @@ struct st_wdog { void __iomem *base; struct device *dev; struct regmap *regmap; - struct st_wdog_syscfg *syscfg; + const struct st_wdog_syscfg *syscfg; struct clk *clk; unsigned long clkrate; bool warm_reset; @@ -150,7 +149,6 @@ static void st_clk_disable_unprepare(void *data) static int st_wdog_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - const struct of_device_id *match; struct device_node *np = dev->of_node; struct st_wdog *st_wdog; struct regmap *regmap; @@ -173,12 +171,7 @@ static int st_wdog_probe(struct platform_device *pdev) if (!st_wdog) return -ENOMEM; - match = of_match_device(st_wdog_match, dev); - if (!match) { - dev_err(dev, "Couldn't match device\n"); - return -ENODEV; - } - st_wdog->syscfg = (struct st_wdog_syscfg *)match->data; + st_wdog->syscfg = (struct st_wdog_syscfg *)device_get_match_data(dev); base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) diff --git a/drivers/watchdog/sunplus_wdt.c b/drivers/watchdog/sunplus_wdt.c index e2d8c532bcb1..9d3ca848e8b6 100644 --- a/drivers/watchdog/sunplus_wdt.c +++ b/drivers/watchdog/sunplus_wdt.c @@ -136,11 +136,6 @@ static const struct watchdog_ops sp_wdt_ops = { .restart = sp_wdt_restart, }; -static void sp_clk_disable_unprepare(void *data) -{ - clk_disable_unprepare(data); -} - static void sp_reset_control_assert(void *data) { reset_control_assert(data); @@ -156,17 +151,9 @@ static int sp_wdt_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->clk = devm_clk_get(dev, NULL); + priv->clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(priv->clk)) - return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to get clock\n"); - - ret = clk_prepare_enable(priv->clk); - if (ret) - return dev_err_probe(dev, ret, "Failed to enable clock\n"); - - ret = devm_add_action_or_reset(dev, sp_clk_disable_unprepare, priv->clk); - if (ret) - return ret; + return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to enable clock\n"); /* The timer and watchdog shared the STC reset */ priv->rstc = devm_reset_control_get_shared(dev, NULL); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 0ba99bed59fc..650fdc7996e1 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -269,7 +269,7 @@ static int wdat_wdt_stop(struct watchdog_device *wdd) static int wdat_wdt_ping(struct watchdog_device *wdd) { - return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, 0, NULL); + return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, wdd->timeout, NULL); } static int wdat_wdt_set_timeout(struct watchdog_device *wdd, diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 2fe4a5832fcf..d6b9758ee23d 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -568,6 +568,7 @@ static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid, } const struct export_operations affs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = affs_fh_to_dentry, .fh_to_parent = affs_fh_to_parent, .get_parent = affs_get_parent, diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index df13a4f9a6e3..c08c2c7d6fbb 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -24,7 +24,6 @@ config BCACHEFS_FS select XXHASH select SRCU select SYMBOLIC_ERRNAME - select MEAN_AND_VARIANCE help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. @@ -42,7 +41,6 @@ config BCACHEFS_POSIX_ACL config BCACHEFS_DEBUG_TRANSACTIONS bool "bcachefs runtime info" depends on BCACHEFS_FS - default y help This makes the list of running btree transactions available in debugfs. @@ -78,7 +76,7 @@ config BCACHEFS_NO_LATENCY_ACCT config MEAN_AND_VARIANCE_UNIT_TEST tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS depends on KUNIT - select MEAN_AND_VARIANCE + depends on BCACHEFS_FS default KUNIT_ALL_TESTS help This option enables the kunit tests for mean_and_variance module. diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 0749731b9072..45b64f89258c 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -70,6 +70,7 @@ bcachefs-y := \ reflink.o \ replicas.o \ sb-clean.o \ + sb-errors.o \ sb-members.o \ siphash.o \ six.o \ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 2d516207e223..1fec0e67891f 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -192,123 +192,109 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + int ret = 0; /* allow for unknown fields */ - if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) { - prt_printf(err, "incorrect value size (%zu < %u)", - bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, + alloc_v1_val_size_bad, + "incorrect value size (%zu < %u)", + bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); +fsck_err: + return ret; } -int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; + int ret = 0; - if (bch2_alloc_unpack_v2(&u, k)) { - prt_printf(err, "unpack error"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, + alloc_v2_unpack_error, + "unpack error"); +fsck_err: + return ret; } -int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; + int ret = 0; - if (bch2_alloc_unpack_v3(&u, k)) { - prt_printf(err, "unpack error"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, + alloc_v2_unpack_error, + "unpack error"); +fsck_err: + return ret; } -int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + int ret = 0; - if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) { - prt_printf(err, "bad val size (%u > %zu)", - alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err, + alloc_v4_val_size_bad, + "bad val size (%u > %zu)", + alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); - if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) { - prt_printf(err, "invalid backpointers_start"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, + alloc_v4_backpointers_start_bad, + "invalid backpointers_start"); - if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { - prt_printf(err, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, + alloc_key_data_type_bad, + "invalid data type (got %u should be %u)", + a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); switch (a.v->data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: - if (a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe) { - prt_printf(err, "empty data type free but have data"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(a.v->dirty_sectors || + a.v->cached_sectors || + a.v->stripe, c, err, + alloc_key_empty_but_have_data, + "empty data type free but have data"); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - if (!a.v->dirty_sectors) { - prt_printf(err, "data_type %s but dirty_sectors==0", - bch2_data_types[a.v->data_type]); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!a.v->dirty_sectors, c, err, + alloc_key_dirty_sectors_0, + "data_type %s but dirty_sectors==0", + bch2_data_types[a.v->data_type]); break; case BCH_DATA_cached: - if (!a.v->cached_sectors || - a.v->dirty_sectors || - a.v->stripe) { - prt_printf(err, "data type inconsistency"); - return -BCH_ERR_invalid_bkey; - } - - if (!a.v->io_time[READ] && - c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs) { - prt_printf(err, "cached bucket with read_time == 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!a.v->cached_sectors || + a.v->dirty_sectors || + a.v->stripe, c, err, + alloc_key_cached_inconsistency, + "data type inconsistency"); + + bkey_fsck_err_on(!a.v->io_time[READ] && + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, + c, err, + alloc_key_cached_but_read_time_zero, + "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: break; } - - return 0; -} - -static inline u64 swab40(u64 x) -{ - return (((x & 0x00000000ffULL) << 32)| - ((x & 0x000000ff00ULL) << 16)| - ((x & 0x0000ff0000ULL) >> 0)| - ((x & 0x00ff000000ULL) >> 16)| - ((x & 0xff00000000ULL) >> 32)); +fsck_err: + return ret; } void bch2_alloc_v4_swab(struct bkey_s k) @@ -324,6 +310,7 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); + a->fragmentation_lru = swab64(a->fragmentation_lru); bps = alloc_v4_backpointers(a); for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { @@ -521,17 +508,18 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) { - prt_printf(err, "bad val size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, + bucket_gens_val_size_bad, + "bad val size (%zu != %zu)", + bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); +fsck_err: + return ret; } void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) @@ -727,7 +715,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" " for %s", set ? "setting" : "clearing", - bch2_btree_ids[btree], + bch2_btree_id_str(btree), iter.pos.inode, iter.pos.offset, bch2_bkey_types[old.k->type], @@ -986,6 +974,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c, + alloc_key_to_missing_dev_bucket, "alloc key for invalid device:bucket %llu:%llu", alloc_k.k->p.inode, alloc_k.k->p.offset)) return bch2_btree_delete_at(trans, alloc_iter, 0); @@ -1005,7 +994,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (k.k->type != discard_key_type && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect key in need_discard btree (got %s should be %s)\n" + fsck_err(c, need_discard_key_wrong, + "incorrect key in need_discard btree (got %s should be %s)\n" " %s", bch2_bkey_types[k.k->type], bch2_bkey_types[discard_key_type], @@ -1035,7 +1025,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (k.k->type != freespace_key_type && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect key in freespace btree (got %s should be %s)\n" + fsck_err(c, freespace_key_wrong, + "incorrect key in freespace btree (got %s should be %s)\n" " %s", bch2_bkey_types[k.k->type], bch2_bkey_types[freespace_key_type], @@ -1066,7 +1057,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (a->gen != alloc_gen(k, gens_offset) && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect gen in bucket_gens btree (got %u should be %u)\n" + fsck_err(c, bucket_gens_key_wrong, + "incorrect gen in bucket_gens btree (got %u should be %u)\n" " %s", alloc_gen(k, gens_offset), a->gen, (printbuf_reset(&buf), @@ -1124,7 +1116,8 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, if (k.k->type != KEY_TYPE_set && (c->opts.reconstruct_alloc || - fsck_err(c, "hole in alloc btree missing in freespace btree\n" + fsck_err(c, freespace_hole_missing, + "hole in alloc btree missing in freespace btree\n" " device %llu buckets %llu-%llu", freespace_iter->pos.inode, freespace_iter->pos.offset, @@ -1187,6 +1180,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, for (i = gens_offset; i < gens_end_offset; i++) { if (fsck_err_on(g.v.gens[i], c, + bucket_gens_hole_wrong, "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", bucket_gens_pos_to_alloc(k.k->p, i).inode, bucket_gens_pos_to_alloc(k.k->p, i).offset, @@ -1244,8 +1238,9 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr return ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c, + need_discard_freespace_key_to_invalid_dev_bucket, "entry in %s btree for nonexistant dev:bucket %llu:%llu", - bch2_btree_ids[iter->btree_id], pos.inode, pos.offset)) + bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) goto delete; a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -1253,9 +1248,10 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr if (fsck_err_on(a->data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a)), c, + need_discard_freespace_key_bad, "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), - bch2_btree_ids[iter->btree_id], + bch2_btree_id_str(iter->btree_id), iter->pos.inode, iter->pos.offset, a->data_type == state, @@ -1320,6 +1316,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, dev_exists = bch2_dev_exists2(c, k.k->p.inode); if (!dev_exists) { if (fsck_err_on(!dev_exists, c, + bucket_gens_to_invalid_dev, "bucket_gens key for invalid device:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -1330,6 +1327,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, ca = bch_dev_bkey_exists(c, k.k->p.inode); if (fsck_err_on(end <= ca->mi.first_bucket || start >= ca->mi.nbuckets, c, + bucket_gens_to_invalid_buckets, "bucket_gens key for invalid buckets:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -1338,6 +1336,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, for (b = start; b < ca->mi.first_bucket; b++) if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, + bucket_gens_nonzero_for_invalid_buckets, "bucket_gens key has nonzero gen for invalid bucket")) { g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; need_update = true; @@ -1345,6 +1344,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, for (b = ca->mi.nbuckets; b < end; b++) if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, + bucket_gens_nonzero_for_invalid_buckets, "bucket_gens key has nonzero gen for invalid bucket")) { g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; need_update = true; @@ -1495,11 +1495,13 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, return ret; if (fsck_err_on(!a->io_time[READ], c, + alloc_key_cached_but_read_time_zero, "cached bucket with read_time 0\n" " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, + alloc_key_to_missing_lru_entry, "missing lru entry\n" " %s", (printbuf_reset(&buf), @@ -2075,6 +2077,17 @@ void bch2_recalc_capacity(struct bch_fs *c) closure_wake_up(&c->freelist_wait); } +u64 bch2_min_rw_member_capacity(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + u64 ret = U64_MAX; + + for_each_rw_member(ca, c, i) + ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); + return ret; +} + static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) { struct open_bucket *ob; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 97042067d2a9..73faf99a222a 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -149,13 +149,13 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -193,7 +193,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); +u64 bch2_min_rw_member_capacity(struct bch_fs *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 3bc4abd3d7d5..b85c7765272f 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans, struct bucket_alloc_state *s, struct closure *cl) { - struct btree_iter iter; - struct bkey_s_c k; + struct btree_iter iter, citer; + struct bkey_s_c k, ck; struct open_bucket *ob = NULL; - u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); - u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor)); + u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); + u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor)); + u64 alloc_cursor = alloc_start; int ret; + + /* + * Scan with an uncached iterator to avoid polluting the key cache. An + * uncached iter will return a cached key if one exists, but if not + * there is no other underlying protection for the associated key cache + * slot. To avoid racing bucket allocations, look up the cached key slot + * of any likely allocation candidate before attempting to proceed with + * the allocation. This provides proper exclusion on the associated + * bucket. + */ again: for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), BTREE_ITER_SLOTS, k, ret) { @@ -419,25 +430,38 @@ again: continue; a = bch2_alloc_to_v4(k, &a_convert); - if (a->data_type != BCH_DATA_free) continue; + /* now check the cached key to serialize concurrent allocs of the bucket */ + ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED); + ret = bkey_err(ck); + if (ret) + break; + + a = bch2_alloc_to_v4(ck, &a_convert); + if (a->data_type != BCH_DATA_free) + goto next; + s->buckets_seen++; ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); +next: + citer.path->preserve = false; + bch2_trans_iter_exit(trans, &citer); if (ob) break; } bch2_trans_iter_exit(trans, &iter); + alloc_cursor = iter.pos.offset; ca->alloc_cursor = alloc_cursor; if (!ob && ret) ob = ERR_PTR(ret); - if (!ob && alloc_cursor > alloc_start) { - alloc_cursor = alloc_start; + if (!ob && alloc_start > first_bucket) { + alloc_cursor = alloc_start = first_bucket; goto again; } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index cc856150a948..ef02c9bb0354 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -5,6 +5,7 @@ #include "backpointers.h" #include "btree_cache.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "btree_write_buffer.h" #include "error.h" @@ -37,25 +38,26 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); + int ret = 0; - if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) { - prt_str(err, "backpointer at wrong pos"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + c, err, + backpointer_pos_wrong, + "backpointer at wrong pos"); +fsck_err: + return ret; } void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) { prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", - bch2_btree_ids[bp->btree_id], + bch2_btree_id_str(bp->btree_id), bp->level, (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), @@ -76,7 +78,7 @@ void bch2_backpointer_swab(struct bkey_s k) { struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); - bp.v->bucket_offset = swab32(bp.v->bucket_offset); + bp.v->bucket_offset = swab40(bp.v->bucket_offset); bp.v->bucket_len = swab32(bp.v->bucket_len); bch2_bpos_swab(&bp.v->pos); } @@ -219,18 +221,22 @@ out: static void backpointer_not_found(struct btree_trans *trans, struct bpos bp_pos, struct bch_backpointer bp, - struct bkey_s_c k, - const char *thing_it_points_to) + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; struct bpos bucket = bp_pos_to_bucket(c, bp_pos); + /* + * If we're using the btree write buffer, the backpointer we were + * looking at may have already been deleted - failure to find what it + * pointed to is not an error: + */ if (likely(!bch2_backpointers_no_use_write_buffer)) return; prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", - thing_it_points_to); + bp.level ? "btree node" : "extent"); prt_printf(&buf, "bucket: "); bch2_bpos_to_text(&buf, bucket); prt_printf(&buf, "\n "); @@ -256,56 +262,37 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, struct bch_backpointer bp, unsigned iter_flags) { - struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, bp.btree_id); - struct bpos bucket = bp_pos_to_bucket(c, bp_pos); - struct bkey_s_c k; - - bch2_trans_node_iter_init(trans, iter, - bp.btree_id, - bp.pos, - 0, - min(bp.level, r->level), - iter_flags); - k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { - bch2_trans_iter_exit(trans, iter); - return k; - } - - if (bp.level == r->level + 1) - k = bkey_i_to_s_c(&r->key); - - if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) - return k; - - bch2_trans_iter_exit(trans, iter); + if (likely(!bp.level)) { + struct bch_fs *c = trans->c; + struct bpos bucket = bp_pos_to_bucket(c, bp_pos); + struct bkey_s_c k; + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, 0, + iter_flags); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } - if (unlikely(bch2_backpointers_no_use_write_buffer)) { - if (bp.level) { - struct btree *b; + if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) + return k; - /* - * If a backpointer for a btree node wasn't found, it may be - * because it was overwritten by a new btree node that hasn't - * been written out yet - backpointer_get_node() checks for - * this: - */ - b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); - if (!IS_ERR_OR_NULL(b)) - return bkey_i_to_s_c(&b->key); + bch2_trans_iter_exit(trans, iter); + backpointer_not_found(trans, bp_pos, bp, k); + return bkey_s_c_null; + } else { + struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); + if (IS_ERR_OR_NULL(b)) { bch2_trans_iter_exit(trans, iter); - - if (IS_ERR(b)) - return bkey_s_c_err(PTR_ERR(b)); - return bkey_s_c_null; + return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; } - - backpointer_not_found(trans, bp_pos, bp, k, "extent"); + return bkey_i_to_s_c(&b->key); } - - return bkey_s_c_null; } struct btree *bch2_backpointer_get_node(struct btree_trans *trans, @@ -329,6 +316,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (IS_ERR(b)) goto err; + BUG_ON(b->c.level != bp.level - 1); + if (b && extent_matches_bp(c, bp.btree_id, bp.level, bkey_i_to_s_c(&b->key), bucket, bp)) @@ -337,8 +326,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (b && btree_node_will_make_reachable(b)) { b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); } else { - backpointer_not_found(trans, bp_pos, bp, - bkey_i_to_s_c(&b->key), "btree node"); + backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); b = NULL; } err: @@ -356,6 +344,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ int ret = 0; if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, + backpointer_to_missing_device, "backpointer for missing device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, bp_iter, 0); @@ -369,6 +358,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ goto out; if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, + backpointer_to_missing_alloc, "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", alloc_iter.pos.inode, alloc_iter.pos.offset, (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { @@ -453,14 +443,14 @@ fsck_err: return ret; missing: prt_printf(&buf, "missing backpointer for btree=%s l=%u ", - bch2_btree_ids[bp.btree_id], bp.level); + bch2_btree_id_str(bp.btree_id), bp.level); bch2_bkey_val_to_text(&buf, c, orig_k); prt_printf(&buf, "\nbp pos "); bch2_bpos_to_text(&buf, bp_iter.pos); if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || c->opts.reconstruct_alloc || - fsck_err(c, "%s", buf.buf)) + fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); goto out; @@ -793,7 +783,9 @@ static int check_one_backpointer(struct btree_trans *trans, } if (fsck_err_on(!k.k, c, - "backpointer for missing extent\n %s", + backpointer_to_missing_ptr, + "backpointer for missing %s\n %s", + bp.v->level ? "btree node" : "extent", (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); goto out; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 547e0617602a..ab866feeaf66 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -7,7 +7,16 @@ #include "buckets.h" #include "super.h" -int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k, +static inline u64 swab40(u64 x) +{ + return (((x & 0x00000000ffULL) << 32)| + ((x & 0x000000ff00ULL) << 16)| + ((x & 0x0000ff0000ULL) >> 0)| + ((x & 0x00ff000000ULL) >> 16)| + ((x & 0xff00000000ULL) >> 32)); +} + +int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, enum bkey_invalid_flags, struct printbuf *); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h index 1fbed1f8378d..be2edced5213 100644 --- a/fs/bcachefs/bbpos.h +++ b/fs/bcachefs/bbpos.h @@ -2,20 +2,9 @@ #ifndef _BCACHEFS_BBPOS_H #define _BCACHEFS_BBPOS_H +#include "bbpos_types.h" #include "bkey_methods.h" - -struct bbpos { - enum btree_id btree; - struct bpos pos; -}; - -static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos) -{ - return (struct bbpos) { btree, pos }; -} - -#define BBPOS_MIN BBPOS(0, POS_MIN) -#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) +#include "btree_cache.h" static inline int bbpos_cmp(struct bbpos l, struct bbpos r) { @@ -40,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) { - prt_str(out, bch2_btree_ids[pos.btree]); + prt_str(out, bch2_btree_id_str(pos.btree)); prt_char(out, ':'); bch2_bpos_to_text(out, pos.pos); } diff --git a/fs/bcachefs/bbpos_types.h b/fs/bcachefs/bbpos_types.h new file mode 100644 index 000000000000..5198e94cf3b8 --- /dev/null +++ b/fs/bcachefs/bbpos_types.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BBPOS_TYPES_H +#define _BCACHEFS_BBPOS_TYPES_H + +struct bbpos { + enum btree_id btree; + struct bpos pos; +}; + +static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos) +{ + return (struct bbpos) { btree, pos }; +} + +#define BBPOS_MIN BBPOS(0, POS_MIN) +#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) + +#endif /* _BCACHEFS_BBPOS_TYPES_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 53ffa88cae16..9cb8684959ee 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -209,6 +209,7 @@ #include "nocow_locking_types.h" #include "opts.h" #include "recovery_types.h" +#include "sb-errors_types.h" #include "seqmutex.h" #include "util.h" @@ -418,6 +419,7 @@ enum bch_time_stats { #include "buckets_types.h" #include "buckets_waiting_for_journal_types.h" #include "clock_types.h" +#include "disk_groups_types.h" #include "ec_types.h" #include "journal_types.h" #include "keylist_types.h" @@ -463,6 +465,7 @@ enum gc_phase { GC_PHASE_BTREE_snapshot_trees, GC_PHASE_BTREE_deleted_inodes, GC_PHASE_BTREE_logged_ops, + GC_PHASE_BTREE_rebalance_work, GC_PHASE_PENDING_DELETE, }; @@ -500,6 +503,8 @@ struct bch_dev { * Committed by bch2_write_super() -> bch_fs_mi_update() */ struct bch_member_cpu mi; + atomic64_t errors[BCH_MEMBER_ERROR_NR]; + __uuid_t uuid; char name[BDEVNAME_SIZE]; @@ -578,7 +583,7 @@ enum { BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ BCH_FS_NEED_ANOTHER_GC, - BCH_FS_HAVE_DELETED_SNAPSHOTS, + BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, /* errors: */ BCH_FS_ERROR, @@ -938,9 +943,6 @@ struct bch_fs { struct list_head moving_context_list; struct mutex moving_context_lock; - struct list_head data_progress_list; - struct mutex data_progress_lock; - /* REBALANCE */ struct bch_fs_rebalance rebalance; @@ -991,11 +993,6 @@ struct bch_fs { struct bio_set dio_read_bioset; struct bio_set nocow_flush_bioset; - /* ERRORS */ - struct list_head fsck_errors; - struct mutex fsck_error_lock; - bool fsck_alloc_err; - /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; @@ -1044,6 +1041,14 @@ struct bch_fs { struct bch2_time_stats times[BCH_TIME_STAT_NR]; struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; + + /* ERRORS */ + struct list_head fsck_error_msgs; + struct mutex fsck_error_msgs_lock; + bool fsck_alloc_msgs_err; + + bch_sb_errors_cpu fsck_error_counts; + struct mutex fsck_error_counts_lock; }; extern struct wait_queue_head bch2_read_only_wait; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 99749f3315fe..0a750953ff92 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -613,31 +613,17 @@ struct bch_extent_stripe_ptr { #endif }; -struct bch_extent_reservation { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:6, - unused:22, - replicas:4, - generation:32; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 generation:32, - replicas:4, - unused:22, - type:6; -#endif -}; - struct bch_extent_rebalance { #if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:7, - unused:33, - compression:8, + __u64 type:6, + unused:34, + compression:8, /* enum bch_compression_opt */ target:16; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 target:16, compression:8, - unused:33, - type:7; + unused:34, + type:6; #endif }; @@ -838,34 +824,30 @@ enum inode_opt_id { Inode_opt_nr, }; -enum { - /* - * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL - * flags) - */ - __BCH_INODE_SYNC = 0, - __BCH_INODE_IMMUTABLE = 1, - __BCH_INODE_APPEND = 2, - __BCH_INODE_NODUMP = 3, - __BCH_INODE_NOATIME = 4, - - __BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */ - __BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */ - __BCH_INODE_UNLINKED = 7, - __BCH_INODE_BACKPTR_UNTRUSTED = 8, - - /* bits 20+ reserved for packed fields below: */ -}; - -#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC) -#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE) -#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND) -#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP) -#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME) -#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) -#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) -#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) -#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED) +#define BCH_INODE_FLAGS() \ + x(sync, 0) \ + x(immutable, 1) \ + x(append, 2) \ + x(nodump, 3) \ + x(noatime, 4) \ + x(i_size_dirty, 5) \ + x(i_sectors_dirty, 6) \ + x(unlinked, 7) \ + x(backptr_untrusted, 8) + +/* bits 20+ reserved for packed fields below: */ + +enum bch_inode_flags { +#define x(t, n) BCH_INODE_##t = 1U << n, + BCH_INODE_FLAGS() +#undef x +}; + +enum __bch_inode_flags { +#define x(t, n) __BCH_INODE_##t = n, + BCH_INODE_FLAGS() +#undef x +}; LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); @@ -1232,7 +1214,8 @@ struct bch_sb_field { x(journal_seq_blacklist, 8) \ x(journal_v2, 9) \ x(counters, 10) \ - x(members_v2, 11) + x(members_v2, 11) \ + x(errors, 12) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1282,6 +1265,18 @@ enum bch_iops_measurement { BCH_IOPS_NR }; +#define BCH_MEMBER_ERROR_TYPES() \ + x(read, 0) \ + x(write, 1) \ + x(checksum, 2) + +enum bch_member_error_type { +#define x(t, n) BCH_MEMBER_ERROR_##t = n, + BCH_MEMBER_ERROR_TYPES() +#undef x + BCH_MEMBER_ERROR_NR +}; + struct bch_member { __uuid_t uuid; __le64 nbuckets; /* device size */ @@ -1292,6 +1287,9 @@ struct bch_member { __le64 flags; __le32 iops[4]; + __le64 errors[BCH_MEMBER_ERROR_NR]; + __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; + __le64 errors_reset_time; }; #define BCH_MEMBER_V1_BYTES 56 @@ -1615,11 +1613,20 @@ struct journal_seq_blacklist_entry { struct bch_sb_field_journal_seq_blacklist { struct bch_sb_field field; + struct journal_seq_blacklist_entry start[]; +}; - struct journal_seq_blacklist_entry start[0]; - __u64 _data[]; +struct bch_sb_field_errors { + struct bch_sb_field field; + struct bch_sb_field_error_entry { + __le64 v; + __le64 last_error_time; + } entries[]; }; +LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); +LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); + /* Superblock: */ /* @@ -1682,7 +1689,9 @@ struct bch_sb_field_journal_seq_blacklist { x(snapshot_skiplists, BCH_VERSION(1, 1), \ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ x(deleted_inodes, BCH_VERSION(1, 2), \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) + BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ + x(rebalance_work, BCH_VERSION(1, 3), \ + BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1693,7 +1702,7 @@ enum bcachefs_metadata_version { }; static const __maybe_unused -unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor; +unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) @@ -2247,7 +2256,8 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); enum btree_id_flags { BTREE_ID_EXTENTS = BIT(0), BTREE_ID_SNAPSHOTS = BIT(1), - BTREE_ID_DATA = BIT(2), + BTREE_ID_SNAPSHOT_FIELD = BIT(2), + BTREE_ID_DATA = BIT(3), }; #define BCH_BTREE_IDS() \ @@ -2302,11 +2312,13 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ BIT_ULL(KEY_TYPE_snapshot_tree)) \ - x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \ + x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \ BIT_ULL(KEY_TYPE_set)) \ x(logged_ops, 17, 0, \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ - BIT_ULL(KEY_TYPE_logged_op_finsert)) + BIT_ULL(KEY_TYPE_logged_op_finsert)) \ + x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ + BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 518450209236..831be01809f2 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -92,19 +92,15 @@ enum bkey_lr_packed { #define bkey_lr_packed(_l, _r) \ ((_l)->format + ((_r)->format << 1)) -#define bkey_copy(_dst, _src) \ -do { \ - BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) && \ - !type_is(_dst, struct bkey_packed *)); \ - BUILD_BUG_ON(!type_is(_src, struct bkey_i *) && \ - !type_is(_src, struct bkey_packed *)); \ - EBUG_ON((u64 *) (_dst) > (u64 *) (_src) && \ - (u64 *) (_dst) < (u64 *) (_src) + \ - ((struct bkey *) (_src))->u64s); \ - \ - memcpy_u64s_small((_dst), (_src), \ - ((struct bkey *) (_src))->u64s); \ -} while (0) +static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src) +{ + memcpy_u64s_small(dst, src, src->u64s); +} + +static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src) +{ + memcpy_u64s_small(dst, src, src->k.u64s); +} struct btree; diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index d9fb1fc81f1e..761f5e33b1e6 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "backpointers.h" #include "bkey_methods.h" +#include "btree_cache.h" #include "btree_types.h" #include "alloc_background.h" #include "dirent.h" @@ -25,7 +26,7 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -39,23 +40,24 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k, .key_invalid = deleted_key_invalid, \ }) -static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_bytes(k.k)) { - prt_printf(err, "incorrect value size (%zu != 0)", - bkey_val_bytes(k.k)); - return -BCH_ERR_invalid_bkey; - } - - return 0; + int ret = 0; + + bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, + bkey_val_size_nonzero, + "incorrect value size (%zu != 0)", + bkey_val_bytes(k.k)); +fsck_err: + return ret; } #define bch2_bkey_ops_error ((struct bkey_ops) { \ .key_invalid = empty_val_key_invalid, \ }) -static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -70,7 +72,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k, .key_invalid = empty_val_key_invalid, \ }) -static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -91,18 +93,6 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, .val_to_text = key_type_inline_data_to_text, \ }) -static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, struct printbuf *err) -{ - if (bkey_val_bytes(k.k)) { - prt_printf(err, "incorrect value size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_cookie)); - return -BCH_ERR_invalid_bkey; - } - - return 0; -} - static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) { bch2_key_resize(l.k, l.k->size + r.k->size); @@ -110,7 +100,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = key_type_set_invalid, \ + .key_invalid = empty_val_key_invalid, \ .key_merge = key_type_set_merge, \ }) @@ -128,84 +118,95 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); + int ret = 0; - if (bkey_val_bytes(k.k) < ops->min_val_size) { - prt_printf(err, "bad val size (%zu < %u)", - bkey_val_bytes(k.k), ops->min_val_size); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, + bkey_val_size_too_small, + "bad val size (%zu < %u)", + bkey_val_bytes(k.k), ops->min_val_size); if (!ops->key_invalid) return 0; - return ops->key_invalid(c, k, flags, err); + ret = ops->key_invalid(c, k, flags, err); +fsck_err: + return ret; } static u64 bch2_key_types_allowed[] = { -#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys, - BCH_BTREE_IDS() -#undef x [BKEY_TYPE_btree] = BIT_ULL(KEY_TYPE_deleted)| BIT_ULL(KEY_TYPE_btree_ptr)| BIT_ULL(KEY_TYPE_btree_ptr_v2), +#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys, + BCH_BTREE_IDS() +#undef x }; +const char *bch2_btree_node_type_str(enum btree_node_type type) +{ + return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); +} + int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->u64s < BKEY_U64s) { - prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - if (flags & BKEY_INVALID_COMMIT && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) { - prt_printf(err, "invalid key type for btree %s (%s)", - bch2_btree_ids[type], bch2_bkey_types[k.k->type]); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, + bkey_u64s_too_small, + "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); - if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - if (k.k->size == 0) { - prt_printf(err, "size == 0"); - return -BCH_ERR_invalid_bkey; - } + if (type >= BKEY_TYPE_NR) + return 0; - if (k.k->size > k.k->p.offset) { - prt_printf(err, "size greater than offset (%u > %llu)", - k.k->size, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, + bkey_invalid_type_for_btree, + "invalid key type for btree %s (%s)", + bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); + + if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { + bkey_fsck_err_on(k.k->size == 0, c, err, + bkey_extent_size_zero, + "size == 0"); + + bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, + bkey_extent_size_greater_than_offset, + "size greater than offset (%u > %llu)", + k.k->size, k.k->p.offset); } else { - if (k.k->size) { - prt_printf(err, "size != 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->size, c, err, + bkey_size_nonzero, + "size != 0"); } if (type != BKEY_TYPE_btree) { - if (!btree_type_has_snapshots((enum btree_id) type) && - k.k->p.snapshot) { - prt_printf(err, "nonzero snapshot"); - return -BCH_ERR_invalid_bkey; - } - - if (btree_type_has_snapshots((enum btree_id) type) && - !k.k->p.snapshot) { - prt_printf(err, "snapshot == 0"); - return -BCH_ERR_invalid_bkey; + enum btree_id btree = type - 1; + + if (btree_type_has_snapshots(btree)) { + bkey_fsck_err_on(!k.k->p.snapshot, c, err, + bkey_snapshot_zero, + "snapshot == 0"); + } else if (!btree_type_has_snapshot_field(btree)) { + bkey_fsck_err_on(k.k->p.snapshot, c, err, + bkey_snapshot_nonzero, + "nonzero snapshot"); + } else { + /* + * btree uses snapshot field but it's not required to be + * nonzero + */ } - if (bkey_eq(k.k->p, POS_MAX)) { - prt_printf(err, "key at POS_MAX"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, + bkey_at_pos_max, + "key at POS_MAX"); } - - return 0; +fsck_err: + return ret; } int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, @@ -217,20 +218,20 @@ int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bch2_bkey_val_invalid(c, k, flags, err); } -int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k, - struct printbuf *err) +int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, struct printbuf *err) { - if (bpos_lt(k.k->p, b->data->min_key)) { - prt_printf(err, "key before start of btree node"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - if (bpos_gt(k.k->p, b->data->max_key)) { - prt_printf(err, "key past end of btree node"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, + bkey_before_start_of_btree_node, + "key before start of btree node"); - return 0; + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, + bkey_after_end_of_btree_node, + "key past end of btree node"); +fsck_err: + return ret; } void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 668f595e2fcf..3a370b7087ac 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -21,7 +21,7 @@ extern const struct bkey_ops bch2_bkey_null_ops; * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(const struct bch_fs *c, struct bkey_s_c k, + int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -55,7 +55,8 @@ int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, enum bkey_invalid_flags, struct printbuf *); int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, enum bkey_invalid_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c, struct printbuf *); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, + struct bkey_s_c, struct printbuf *); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); @@ -119,16 +120,6 @@ enum btree_update_flags { #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) -#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \ - ((1U << KEY_TYPE_alloc)| \ - (1U << KEY_TYPE_alloc_v2)| \ - (1U << KEY_TYPE_alloc_v3)| \ - (1U << KEY_TYPE_alloc_v4)| \ - (1U << KEY_TYPE_stripe)| \ - (1U << KEY_TYPE_inode)| \ - (1U << KEY_TYPE_inode_v2)| \ - (1U << KEY_TYPE_snapshot)) - static inline int bch2_trans_mark_key(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index b9aa027c881b..bcca9e76a0b4 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -106,7 +106,7 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, while ((k = sort_iter_peek(iter))) { if (!bkey_deleted(k) && !should_drop_next_key(iter)) { - bkey_copy(out, k); + bkey_p_copy(out, k); btree_keys_account_key_add(&nr, 0, out); out = bkey_p_next(out); } @@ -137,7 +137,7 @@ bch2_sort_repack(struct bset *dst, struct btree *src, continue; if (!transform) - bkey_copy(out, in); + bkey_p_copy(out, in); else if (bch2_bkey_transform(out_f, out, bkey_packed(in) ? in_f : &bch2_bkey_format_current, in)) out->format = KEY_FORMAT_LOCAL_BTREE; @@ -191,7 +191,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in)); set_bkeyp_val_u64s(f, out, 0); } else { - bkey_copy(out, in); + bkey_p_copy(out, in); } out->needs_whiteout |= needs_whiteout; out = bkey_p_next(out); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 5e5858191905..47e7770d0583 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -472,7 +472,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) mutex_init(&c->verify_lock); - shrink = shrinker_alloc(0, "%s/btree_cache", c->name); + shrink = shrinker_alloc(0, "%s-btree_cache", c->name); if (!shrink) goto err; bc->shrink = shrink; @@ -785,12 +785,12 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) "btree node header doesn't match ptr\n" "btree %s level %u\n" "ptr: ", - bch2_btree_ids[b->c.btree_id], b->c.level); + bch2_btree_id_str(b->c.btree_id), b->c.level); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_printf(&buf, "\nheader: btree %s level %llu\n" "min ", - bch2_btree_ids[BTREE_NODE_ID(b->data)], + bch2_btree_id_str(BTREE_NODE_ID(b->data)), BTREE_NODE_LEVEL(b->data)); bch2_bpos_to_text(&buf, b->data->min_key); @@ -1153,8 +1153,21 @@ wait_on_io: six_unlock_intent(&b->c.lock); } -void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, - const struct btree *b) +const char *bch2_btree_id_str(enum btree_id btree) +{ + return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; +} + +void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) +{ + prt_printf(out, "%s level %u/%u\n ", + bch2_btree_id_str(b->c.btree_id), + b->c.level, + bch2_btree_id_root(c, b->c.btree_id)->level); + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); +} + +void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { struct bset_stats stats; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 1e562b6efa62..cfb80b201d61 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -123,8 +123,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) return bch2_btree_id_root(c, b->c.btree_id)->b; } -void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, - const struct btree *); +const char *bch2_btree_id_str(enum btree_id); +void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); +void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *); #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 693ed067b1a7..0b5d09c8475d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -95,15 +95,15 @@ static int bch2_gc_check_topology(struct bch_fs *c, bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_ids[b->c.btree_id], b->c.level, - buf1.buf, buf2.buf) && - should_restart_for_topology_repair(c)) { + FSCK_CAN_FIX| + FSCK_CAN_IGNORE| + FSCK_NO_RATELIMIT, + btree_node_topology_bad_min_key, + "btree node with incorrect min_key at btree %s level %u:\n" + " prev %s\n" + " cur %s", + bch2_btree_id_str(b->c.btree_id), b->c.level, + buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); goto err; @@ -122,14 +122,12 @@ static int bch2_gc_check_topology(struct bch_fs *c, bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); bch2_bpos_to_text(&buf2, node_end); - if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, + if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); @@ -287,10 +285,11 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key, cur->data->min_key), c, + btree_node_topology_overwritten_by_next_node, "btree node overwritten by next node at btree %s level %u:\n" " node %s\n" " next %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = DROP_PREV_NODE; goto out; @@ -298,10 +297,11 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p, bpos_predecessor(cur->data->min_key)), c, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " node %s\n" " next %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) ret = set_node_max(c, prev, bpos_predecessor(cur->data->min_key)); @@ -310,20 +310,22 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(bpos_ge(expected_start, cur->data->max_key), c, + btree_node_topology_overwritten_by_prev_node, "btree node overwritten by prev node at btree %s level %u:\n" " prev %s\n" " node %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = DROP_THIS_NODE; goto out; } if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c, + btree_node_topology_bad_min_key, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " node %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) ret = set_node_min(c, cur, expected_start); } @@ -344,10 +346,11 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b, bch2_bpos_to_text(&buf2, b->key.k.p); if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = set_node_max(c, child, b->key.k.p); if (ret) @@ -396,9 +399,10 @@ again: bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); if (mustfix_fsck_err_on(ret == -EIO, c, + btree_node_unreadable, "Topology repair: unreadable btree node at btree %s level %u:\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level - 1, buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); @@ -504,9 +508,10 @@ again: bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); if (mustfix_fsck_err_on(!have_child, c, + btree_node_topology_interior_node_empty, "empty interior btree node at btree %s level %u\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf)) ret = DROP_THIS_NODE; err: @@ -582,7 +587,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (!g->gen_valid && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" + fsck_err(c, ptr_to_missing_alloc_key, + "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -599,7 +605,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (gen_cmp(p.ptr.gen, g->gen) > 0 && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" + fsck_err(c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -620,7 +627,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" + fsck_err(c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -631,7 +639,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" + fsck_err(c, stale_dirty_ptr, + "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -645,6 +654,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (fsck_err_on(bucket_data_type(g->data_type) && bucket_data_type(g->data_type) != data_type, c, + ptr_bucket_data_type_mismatch, "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), @@ -664,6 +674,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); if (fsck_err_on(!m || !m->alive, c, + ptr_to_missing_stripe, "pointer to nonexistent stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -672,6 +683,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id do_update = true; if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, + ptr_to_incorrect_stripe, "pointer does not match stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -811,6 +823,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, goto err; if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c, + bkey_version_in_future, "key version number higher than recorded: %llu > %llu", k->k->version.lo, atomic64_read(&c->key_version))) @@ -968,9 +981,10 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b FSCK_CAN_FIX| FSCK_CAN_IGNORE| FSCK_NO_RATELIMIT, + btree_node_read_error, "Unreadable btree node at btree %s level %u:\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level - 1, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && @@ -1025,6 +1039,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->min_key); if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c, + btree_root_bad_min_key, "btree root with incorrect min_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = -BCH_ERR_fsck_repair_unimplemented; @@ -1034,6 +1049,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->max_key); if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c, + btree_root_bad_max_key, "btree root with incorrect max_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = -BCH_ERR_fsck_repair_unimplemented; @@ -1207,16 +1223,16 @@ static int bch2_gc_done(struct bch_fs *c, percpu_down_write(&c->mark_lock); -#define copy_field(_f, _msg, ...) \ +#define copy_field(_err, _f, _msg, ...) \ if (dst->_f != src->_f && \ (!verify || \ - fsck_err(c, _msg ": got %llu, should be %llu" \ + fsck_err(c, _err, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f))) \ dst->_f = src->_f -#define copy_dev_field(_f, _msg, ...) \ - copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) -#define copy_fs_field(_f, _msg, ...) \ - copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) +#define copy_dev_field(_err, _f, _msg, ...) \ + copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) +#define copy_fs_field(_err, _f, _msg, ...) \ + copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) for (i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); @@ -1227,13 +1243,17 @@ static int bch2_gc_done(struct bch_fs *c, bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, dev_usage_u64s()); - copy_dev_field(buckets_ec, "buckets_ec"); - for (i = 0; i < BCH_DATA_NR; i++) { - copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); - copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]); - copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]); + copy_dev_field(dev_usage_buckets_wrong, + d[i].buckets, "%s buckets", bch2_data_types[i]); + copy_dev_field(dev_usage_sectors_wrong, + d[i].sectors, "%s sectors", bch2_data_types[i]); + copy_dev_field(dev_usage_fragmented_wrong, + d[i].fragmented, "%s fragmented", bch2_data_types[i]); } + + copy_dev_field(dev_usage_buckets_ec_wrong, + buckets_ec, "buckets_ec"); } { @@ -1242,17 +1262,24 @@ static int bch2_gc_done(struct bch_fs *c, struct bch_fs_usage *src = (void *) bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); - copy_fs_field(hidden, "hidden"); - copy_fs_field(btree, "btree"); + copy_fs_field(fs_usage_hidden_wrong, + hidden, "hidden"); + copy_fs_field(fs_usage_btree_wrong, + btree, "btree"); if (!metadata_only) { - copy_fs_field(data, "data"); - copy_fs_field(cached, "cached"); - copy_fs_field(reserved, "reserved"); - copy_fs_field(nr_inodes,"nr_inodes"); + copy_fs_field(fs_usage_data_wrong, + data, "data"); + copy_fs_field(fs_usage_cached_wrong, + cached, "cached"); + copy_fs_field(fs_usage_reserved_wrong, + reserved, "reserved"); + copy_fs_field(fs_usage_nr_inodes_wrong, + nr_inodes,"nr_inodes"); for (i = 0; i < BCH_REPLICAS_MAX; i++) - copy_fs_field(persistent_reserved[i], + copy_fs_field(fs_usage_persistent_reserved_wrong, + persistent_reserved[i], "persistent_reserved[%i]", i); } @@ -1268,7 +1295,8 @@ static int bch2_gc_done(struct bch_fs *c, printbuf_reset(&buf); bch2_replicas_entry_to_text(&buf, e); - copy_fs_field(replicas[i], "%s", buf.buf); + copy_fs_field(fs_usage_replicas_wrong, + replicas[i], "%s", buf.buf); } } @@ -1404,6 +1432,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (c->opts.reconstruct_alloc || fsck_err_on(new.data_type != gc.data_type, c, + alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" ": got %s, should be %s", iter->pos.inode, iter->pos.offset, @@ -1412,9 +1441,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, bch2_data_types[gc.data_type])) new.data_type = gc.data_type; -#define copy_bucket_field(_f) \ +#define copy_bucket_field(_errtype, _f) \ if (c->opts.reconstruct_alloc || \ - fsck_err_on(new._f != gc._f, c, \ + fsck_err_on(new._f != gc._f, c, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ ": got %u, should be %u", \ iter->pos.inode, iter->pos.offset, \ @@ -1423,11 +1452,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans, new._f, gc._f)) \ new._f = gc._f; \ - copy_bucket_field(gen); - copy_bucket_field(dirty_sectors); - copy_bucket_field(cached_sectors); - copy_bucket_field(stripe_redundancy); - copy_bucket_field(stripe); + copy_bucket_field(alloc_key_gen_wrong, + gen); + copy_bucket_field(alloc_key_dirty_sectors_wrong, + dirty_sectors); + copy_bucket_field(alloc_key_cached_sectors_wrong, + cached_sectors); + copy_bucket_field(alloc_key_stripe_wrong, + stripe); + copy_bucket_field(alloc_key_stripe_redundancy_wrong, + stripe_redundancy); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) @@ -1584,6 +1618,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, } if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, + reflink_v_refcount_wrong, "reflink key has wrong refcount:\n" " %s\n" " should be %u", @@ -1709,7 +1744,8 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, if (bad) bch2_bkey_val_to_text(&buf, c, k); - if (fsck_err_on(bad, c, "%s", buf.buf)) { + if (fsck_err_on(bad, c, stripe_sector_count_wrong, + "%s", buf.buf)) { struct bkey_i_stripe *new; new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); @@ -1954,19 +1990,17 @@ int bch2_gc_gens(struct bch_fs *c) trans = bch2_trans_get(c); for_each_member_device(ca, c, i) { - struct bucket_gens *gens; + struct bucket_gens *gens = bucket_gens(ca); BUG_ON(ca->oldest_gen); - ca->oldest_gen = kvmalloc(ca->mi.nbuckets, GFP_KERNEL); + ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL); if (!ca->oldest_gen) { percpu_ref_put(&ca->ref); ret = -BCH_ERR_ENOMEM_gc_gens; goto err; } - gens = bucket_gens(ca); - for (b = gens->first_bucket; b < gens->nbuckets; b++) ca->oldest_gen[b] = gens->b[b]; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index a869cf6ac7c6..37d896edb06e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -184,7 +184,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) k = new_whiteouts; while (ptrs != ptrs_end) { - bkey_copy(k, *ptrs); + bkey_p_copy(k, *ptrs); k = bkey_p_next(k); ptrs++; } @@ -260,7 +260,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) n = bkey_p_next(k); if (!bkey_deleted(k)) { - bkey_copy(out, k); + bkey_p_copy(out, k); out = bkey_p_next(out); } else { BUG_ON(k->needs_whiteout); @@ -510,16 +510,6 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) bch2_trans_node_reinit_iter(trans, b); } -static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c, - struct btree *b) -{ - prt_printf(out, "%s level %u/%u\n ", - bch2_btree_ids[b->c.btree_id], - b->c.level, - bch2_btree_id_root(c, b->c.btree_id)->level); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -} - static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, @@ -532,7 +522,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, if (ca) prt_printf(out, "on %s ", ca->name); prt_printf(out, "at btree "); - btree_pos_to_text(out, c, b); + bch2_btree_pos_to_text(out, c, b); prt_printf(out, "\n node offset %u", b->written); if (i) @@ -540,7 +530,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } -__printf(8, 9) +__printf(9, 10) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, @@ -548,6 +538,7 @@ static int __btree_err(int ret, struct bset *i, int write, bool have_retry, + enum bch_sb_error_id err_type, const char *fmt, ...) { struct printbuf out = PRINTBUF; @@ -572,9 +563,15 @@ static int __btree_err(int ret, if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; + if (ret != -BCH_ERR_btree_node_read_err_fixable) + bch2_sb_error_count(c, err_type); + switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - mustfix_fsck_err(c, "%s", out.buf); + ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf); + if (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore) + goto fsck_err; ret = -BCH_ERR_fsck_fix; break; case -BCH_ERR_btree_node_read_err_want_retry: @@ -599,9 +596,11 @@ fsck_err: return ret; } -#define btree_err(type, c, ca, b, i, msg, ...) \ +#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, write, have_retry, msg, ##__VA_ARGS__);\ + int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \ + BCH_FSCK_ERR_##_err_type, \ + msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ ret = _ret; \ @@ -676,13 +675,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, int ret = 0; btree_err_on(!bch2_version_compatible(version), - -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, + -BCH_ERR_btree_node_read_err_incompatible, + c, ca, b, i, + btree_node_unsupported_version, "unsupported bset version %u.%u", BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); if (btree_err_on(version < c->sb.version_min, - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { mutex_lock(&c->sb_lock); @@ -693,7 +696,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, if (btree_err_on(BCH_VERSION_MAJOR(version) > BCH_VERSION_MAJOR(c->sb.version), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { mutex_lock(&c->sb_lock); @@ -703,11 +708,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), - -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, + -BCH_ERR_btree_node_read_err_incompatible, + c, ca, b, i, + btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); if (btree_err_on(offset + sectors > btree_sectors(c), - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_past_end_of_btree_node, "bset past end of btree node")) { i->u64s = 0; ret = 0; @@ -715,12 +724,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(offset && !i->u64s, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_empty, "empty bset"); - btree_err_on(BSET_OFFSET(i) && - BSET_OFFSET(i) != offset, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_wrong_sector_offset, "bset at wrong sector offset"); if (!offset) { @@ -734,16 +746,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, /* XXX endianness */ btree_err_on(bp->seq != bn->keys.seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + bset_bad_seq, "incorrect sequence number (wrong btree node)"); } btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_btree, "incorrect btree id"); btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_level, "incorrect level"); if (!write) @@ -760,7 +778,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_min_key, "incorrect min_key: got %s should be %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), @@ -769,7 +789,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_max_key, "incorrect max key %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); @@ -779,7 +801,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BSET_BIG_ENDIAN(i), write, bn); btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), - -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i, + -BCH_ERR_btree_node_read_err_bad_node, + c, ca, b, i, + btree_node_bad_format, "invalid bkey format: %s\n %s", buf1.buf, (printbuf_reset(&buf2), bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); @@ -802,7 +826,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, struct printbuf *err) { return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(b, k, err) : 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); } @@ -823,14 +847,18 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bkey tmp; if (btree_err_on(bkey_p_next(k) > vstruct_last(i), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_past_bset_end, "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } if (btree_err_on(k->format > KEY_FORMAT_CURRENT, - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_bad_format, "invalid bkey format %u", k->format)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), @@ -849,12 +877,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, printbuf_reset(&buf); if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { printbuf_reset(&buf); - prt_printf(&buf, "invalid bkey: "); bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, u.s_c); - btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf); + btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bad_bkey, + "invalid bkey: %s", buf.buf); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), @@ -878,7 +908,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, bch2_dump_bset(c, b, i, 0); - if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf)) { + if (btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_out_of_order, + "%s", buf.buf)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); @@ -919,47 +952,62 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); if (bch2_meta_read_fault("btree")) - btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + btree_err(-BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_fault_injected, "dynamic fault"); btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_magic, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(b->data->magic)); - btree_err_on(!b->data->keys.seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, - "bad btree header: seq 0"); - if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; btree_err_on(b->data->keys.seq != bp->seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_seq, "got wrong btree node (seq %llx want %llx)", b->data->keys.seq, bp->seq); + } else { + btree_err_on(!b->data->keys.seq, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_seq, + "bad btree header: seq 0"); } while (b->written < (ptr_written ?: btree_sectors(c))) { unsigned sectors; struct nonce nonce; - struct bch_csum csum; bool first = !b->written; + bool csum_bad; if (!b->written) { i = &b->data->keys; btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, - "unknown checksum type %llu", - BSET_CSUM_TYPE(i)); + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_unknown_csum, + "unknown checksum type %llu", BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); - csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); - btree_err_on(bch2_crc_cmp(csum, b->data->csum), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + csum_bad = bch2_crc_cmp(b->data->csum, + csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data)); + if (csum_bad) + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); + + btree_err_on(csum_bad, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_bad_csum, "invalid checksum"); ret = bset_encrypt(c, i, b->written << 9); @@ -969,7 +1017,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), - -BCH_ERR_btree_node_read_err_incompatible, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_incompatible, + c, NULL, b, NULL, + btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); sectors = vstruct_sectors(b->data, c->block_bits); @@ -981,15 +1031,21 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, break; btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, - "unknown checksum type %llu", - BSET_CSUM_TYPE(i)); + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_unknown_csum, + "unknown checksum type %llu", BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); - csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); - - btree_err_on(bch2_crc_cmp(csum, bne->csum), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + csum_bad = bch2_crc_cmp(bne->csum, + csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne)); + if (csum_bad) + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); + + btree_err_on(csum_bad, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_bad_csum, "invalid checksum"); ret = bset_encrypt(c, i, b->written << 9); @@ -1022,12 +1078,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, true); btree_err_on(blacklisted && first, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_blacklisted_journal_seq, "first btree node bset has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + first_bset_blacklisted_journal_seq, "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); @@ -1044,7 +1104,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (ptr_written) { btree_err_on(b->written < ptr_written, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, NULL, + btree_node_data_missing, "btree node data missing: expected %u sectors, found %u", ptr_written, b->written); } else { @@ -1055,7 +1117,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, !bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), true), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, NULL, + btree_node_bset_after_end, "found bset signature after last bset"); } @@ -1097,7 +1161,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, u.s_c); - btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf); + btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bad_bkey, + "%s", buf.buf); btree_keys_account_key_drop(&b->nr, 0, k); @@ -1177,8 +1244,9 @@ static void btree_node_read_work(struct work_struct *work) } start: printbuf_reset(&buf); - btree_pos_to_text(&buf, c, b); - bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s", + bch2_btree_pos_to_text(&buf, c, b); + bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, + "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); @@ -1213,7 +1281,7 @@ start: printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", - __func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf); + __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); bch2_btree_node_rewrite_async(c, b); } @@ -1322,14 +1390,20 @@ static void btree_node_read_all_replicas_done(struct closure *cl) } written2 = btree_node_sectors_written(c, ra->buf[i]); - if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_replicas_sectors_written_mismatch, "btree node sectors written mismatch: %u != %u", written, written2) || btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_bset_after_end, "found bset signature after last bset") || btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_replicas_data_mismatch, "btree node replicas content mismatch")) dump_bset_maps = true; @@ -1524,7 +1598,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, struct printbuf buf = PRINTBUF; prt_str(&buf, "btree node read error: no device to read from\n at "); - btree_pos_to_text(&buf, c, b); + bch2_btree_pos_to_text(&buf, c, b); bch_err(c, "%s", buf.buf); if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && @@ -1759,7 +1833,8 @@ static void btree_node_write_endio(struct bio *bio) if (wbio->have_ioref) bch2_latency_acct(ca, wbio->submit_time, WRITE); - if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + "btree write error: %s", bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("btree")) { spin_lock_irqsave(&c->btree_write_error_lock, flags); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1d79514754d7..c2adf3fbb0b3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -257,7 +257,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && - !btree_type_has_snapshots(iter->btree_id)); + !btree_type_has_snapshot_field(iter->btree_id)); if (iter->update_path) bch2_btree_path_verify(trans, iter->update_path); @@ -362,7 +362,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, bch2_bpos_to_text(&buf, pos); panic("not locked: %s %s%s\n", - bch2_btree_ids[id], buf.buf, + bch2_btree_id_str(id), buf.buf, key_cache ? " cached" : ""); } @@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, if (unlikely(ret)) goto out; + if (unlikely(!trans->srcu_held)) + bch2_trans_srcu_lock(trans); + /* * Ensure we obey path->should_be_locked: if it's set, we can't unlock * and re-traverse the path without a transaction restart: @@ -1371,7 +1374,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) struct bkey_s_c old = { &i->old_k, i->old_v }; prt_printf(buf, "update: btree=%s cached=%u %pS", - bch2_btree_ids[i->btree_id], + bch2_btree_id_str(i->btree_id), i->cached, (void *) i->ip_allocated); prt_newline(buf); @@ -1387,7 +1390,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) trans_for_each_wb_update(trans, wb) { prt_printf(buf, "update: btree=%s wb=1 %pS", - bch2_btree_ids[wb->btree], + bch2_btree_id_str(wb->btree), (void *) i->ip_allocated); prt_newline(buf); @@ -1416,7 +1419,7 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) path->idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', path->should_be_locked ? 'S' : ' ', - bch2_btree_ids[path->btree_id], + bch2_btree_id_str(path->btree_id), path->level); bch2_bpos_to_text(out, path->pos); @@ -1523,6 +1526,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, path->ref = 0; path->intent_ref = 0; path->nodes_locked = 0; + path->alloc_seq++; btree_path_list_add(trans, pos, path); trans->paths_sorted = false; @@ -1598,7 +1602,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, locks_want = min(locks_want, BTREE_MAX_DEPTH); if (locks_want > path->locks_want) - bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want); + bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL); return path; } @@ -2829,18 +2833,36 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } -static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) +static inline void check_srcu_held_too_long(struct btree_trans *trans) { - struct bch_fs *c = trans->c; - struct btree_path *path; + WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10), + "btree trans held srcu lock (delaying memory reclaim) for %lu seconds", + (jiffies - trans->srcu_lock_time) / HZ); +} - trans_for_each_path(trans, path) - if (path->cached && !btree_node_locked(path, 0)) - path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); +void bch2_trans_srcu_unlock(struct btree_trans *trans) +{ + if (trans->srcu_held) { + struct bch_fs *c = trans->c; + struct btree_path *path; - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); - trans->srcu_lock_time = jiffies; + trans_for_each_path(trans, path) + if (path->cached && !btree_node_locked(path, 0)) + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); + + check_srcu_held_too_long(trans); + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + trans->srcu_held = false; + } +} + +void bch2_trans_srcu_lock(struct btree_trans *trans) +{ + if (!trans->srcu_held) { + trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; + trans->srcu_held = true; + } } /** @@ -2894,8 +2916,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) } trans->last_begin_time = now; - if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) - bch2_trans_reset_srcu_lock(trans); + if (unlikely(trans->srcu_held && + time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) + bch2_trans_srcu_unlock(trans); trans->last_begin_ip = _RET_IP_; if (trans->restarted) { @@ -2980,8 +3003,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->wb_updates_size = s->wb_updates_size; } - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; + trans->srcu_held = true; if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { struct btree_trans *pos; @@ -3025,7 +3049,7 @@ leaked: trans_for_each_path(trans, path) if (path->ref) printk(KERN_ERR " btree %s %pS\n", - bch2_btree_ids[path->btree_id], + bch2_btree_id_str(path->btree_id), (void *) path->ip_allocated); /* Be noisy about this: */ bch2_fatal_error(c); @@ -3058,7 +3082,10 @@ void bch2_trans_put(struct btree_trans *trans) check_btree_paths_leaked(trans); - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + if (trans->srcu_held) { + check_srcu_held_too_long(trans); + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + } bch2_journal_preres_put(&c->journal, &trans->journal_preres); @@ -3100,7 +3127,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, prt_tab(out); prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b', - b->level, bch2_btree_ids[b->btree_id]); + b->level, bch2_btree_id_str(b->btree_id)); bch2_bpos_to_text(out, btree_node_pos(b)); prt_tab(out); @@ -3130,7 +3157,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) path->idx, path->cached ? 'c' : 'b', path->level, - bch2_btree_ids[path->btree_id]); + bch2_btree_id_str(path->btree_id)); bch2_bpos_to_text(out, path->pos); prt_newline(out); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index fbe273453db3..85e7cb52f6b6 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool); int bch2_trans_relock(struct btree_trans *); int bch2_trans_relock_notrace(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); +void bch2_trans_unlock_long(struct btree_trans *); bool bch2_trans_locked(struct btree_trans *); static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) @@ -411,11 +412,11 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS; if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && - btree_node_type_is_extents(btree_id)) + btree_id_is_extents(btree_id)) flags |= BTREE_ITER_IS_EXTENTS; if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && - !btree_type_has_snapshots(btree_id)) + !btree_type_has_snapshot_field(btree_id)) flags &= ~BTREE_ITER_ALL_SNAPSHOTS; if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) && @@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \ KEY_TYPE_##_type, sizeof(*_val), _val) +void bch2_trans_srcu_unlock(struct btree_trans *); +void bch2_trans_srcu_lock(struct btree_trans *); + u32 bch2_trans_begin(struct btree_trans *); /* diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f9a5e38a085b..9b78f78a75b5 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -324,7 +324,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", - bch2_btree_ids[path->btree_id]); + bch2_btree_id_str(path->btree_id)); return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); } @@ -407,7 +407,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_ids[ck->key.btree_id], new_u64s); + bch2_btree_id_str(ck->key.btree_id), new_u64s); ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; goto err; } @@ -509,7 +509,7 @@ fill: * path->uptodate yet: */ if (!path->locks_want && - !__bch2_btree_path_upgrade(trans, path, 1)) { + !__bch2_btree_path_upgrade(trans, path, 1, NULL)) { trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); goto err; @@ -1038,7 +1038,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->table_init_done = true; - shrink = shrinker_alloc(0, "%s/btree_key_cache", c->name); + shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name); if (!shrink) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->shrink = shrink; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 40c8ed8f7bf1..3d48834d091f 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, static inline bool btree_path_get_locks(struct btree_trans *trans, struct btree_path *path, - bool upgrade) + bool upgrade, + struct get_locks_fail *f) { unsigned l = path->level; int fail_idx = -1; @@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, if (!(upgrade ? bch2_btree_node_upgrade(trans, path, l) - : bch2_btree_node_relock(trans, path, l))) - fail_idx = l; + : bch2_btree_node_relock(trans, path, l))) { + fail_idx = l; + + if (f) { + f->l = l; + f->b = path->l[l].b; + } + } l++; } while (l < path->locks_want); @@ -584,7 +591,9 @@ __flatten bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { - return btree_path_get_locks(trans, path, false); + struct get_locks_fail f; + + return btree_path_get_locks(trans, path, false, &f); } int __bch2_btree_path_relock(struct btree_trans *trans, @@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans, bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { EBUG_ON(path->locks_want >= new_locks_want); path->locks_want = new_locks_want; - return btree_path_get_locks(trans, path, true); + return btree_path_get_locks(trans, path, true, f); } bool __bch2_btree_path_upgrade(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { struct btree_path *linked; - if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want)) + if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) return true; /* @@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_path_get_locks(trans, linked, true); + btree_path_get_locks(trans, linked, true, NULL); } return false; @@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, { unsigned l; + if (trans->restarted) + return; + EBUG_ON(path->locks_want < new_locks_want); path->locks_want = new_locks_want; @@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, } bch2_btree_path_verify_locks(path); + + path->downgrade_seq++; + trace_path_downgrade(trans, _RET_IP_, path); } /* Btree transaction locking: */ @@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans) { struct btree_path *path; + if (trans->restarted) + return; + trans_for_each_path(trans, path) bch2_btree_path_downgrade(trans, path); } @@ -733,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans) __bch2_btree_path_unlock(trans, path); } +void bch2_trans_unlock_long(struct btree_trans *trans) +{ + bch2_trans_unlock(trans); + bch2_trans_srcu_unlock(trans); +} + bool bch2_trans_locked(struct btree_trans *trans) { struct btree_path *path; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 6231e9ffc5d7..11b0a2c8cd69 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans, /* upgrade */ + +struct get_locks_fail { + unsigned l; + struct btree *b; +}; + bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, - struct btree_path *, unsigned); + struct btree_path *, unsigned, + struct get_locks_fail *); + bool __bch2_btree_path_upgrade(struct btree_trans *, - struct btree_path *, unsigned); + struct btree_path *, unsigned, + struct get_locks_fail *); static inline int bch2_btree_path_upgrade(struct btree_trans *trans, struct btree_path *path, unsigned new_locks_want) { + struct get_locks_fail f; unsigned old_locks_want = path->locks_want; new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); if (path->locks_want < new_locks_want - ? __bch2_btree_path_upgrade(trans, path, new_locks_want) + ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f) : path->uptodate == BTREE_ITER_UPTODATE) return 0; trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path, - old_locks_want, new_locks_want); + old_locks_want, new_locks_want, &f); return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); } diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 04c1f4610972..decad7b66c59 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -269,6 +269,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(i->level != i->path->level); BUG_ON(i->btree_id != i->path->btree_id); EBUG_ON(!i->level && + btree_type_has_snapshots(i->btree_id) && !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && @@ -349,7 +350,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS); if (!new_k) { bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_ids[path->btree_id], new_u64s); + bch2_btree_id_str(path->btree_id), new_u64s); return -BCH_ERR_ENOMEM_btree_key_cache_insert; } @@ -379,11 +380,10 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc((enum btree_node_type) i->btree_id)) + if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) return 0; - if (old_ops->atomic_trigger == new_ops->atomic_trigger && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + if (old_ops->atomic_trigger == new_ops->atomic_trigger) { ret = bch2_mark_key(trans, i->btree_id, i->level, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); @@ -425,8 +425,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (!i->insert_trigger_run && !i->overwrite_trigger_run && - old_ops->trans_trigger == new_ops->trans_trigger && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + old_ops->trans_trigger == new_ops->trans_trigger) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, @@ -683,7 +682,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_overwrite, i->btree_id, i->level, i->old_k.u64s); - bkey_reassemble(&entry->start[0], + bkey_reassemble((struct bkey_i *) entry->start, (struct bkey_s_c) { &i->old_k, i->old_v }); } @@ -691,7 +690,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_btree_keys, i->btree_id, i->level, i->k->k.u64s); - bkey_copy(&entry->start[0], i->k); + bkey_copy((struct bkey_i *) entry->start, i->k); } trans_for_each_wb_update(trans, wb) { @@ -699,7 +698,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_btree_keys, wb->btree, 0, wb->k.k.u64s); - bkey_copy(&entry->start[0], &wb->k); + bkey_copy((struct bkey_i *) entry->start, &wb->k); } if (trans->journal_seq) @@ -776,12 +775,12 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags, +static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, + enum bkey_invalid_flags flags, struct btree_insert_entry *i, struct printbuf *err) { struct bch_fs *c = trans->c; - int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE; printbuf_reset(err); prt_printf(err, "invalid bkey on insert from %s -> %ps", @@ -792,8 +791,7 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); prt_newline(err); - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, rw, err); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); bch2_print_string_as_lines(KERN_ERR, err->buf); bch2_inconsistent_error(c); @@ -864,12 +862,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags */ bch2_journal_res_put(&c->journal, &trans->journal_res); - if (unlikely(ret)) - return ret; - - bch2_trans_downgrade(trans); - - return 0; + return ret; } static int journal_reclaim_wait_done(struct bch_fs *c) @@ -1034,7 +1027,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, flags, i, &buf); + ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); btree_insert_entry_checks(trans, i); printbuf_exit(&buf); @@ -1138,6 +1131,8 @@ out: if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) bch2_write_ref_put(c, BCH_WRITE_REF_trans); out_reset: + if (!ret) + bch2_trans_downgrade(trans); bch2_trans_reset_updates(trans); return ret; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index bc6714d88925..941841a0c5bf 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -228,6 +228,8 @@ struct btree_path { u8 sorted_idx; u8 ref; u8 intent_ref; + u32 alloc_seq; + u32 downgrade_seq; /* btree_iter_copy starts here: */ struct bpos pos; @@ -424,6 +426,7 @@ struct btree_trans { u8 nr_updates; u8 nr_wb_updates; u8 wb_updates_size; + bool srcu_held:1; bool used_mempool:1; bool in_traverse_all:1; bool paths_sorted:1; @@ -636,16 +639,17 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) } enum btree_node_type { -#define x(kwd, val, ...) BKEY_TYPE_##kwd = val, + BKEY_TYPE_btree, +#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, BCH_BTREE_IDS() #undef x - BKEY_TYPE_btree, + BKEY_TYPE_NR }; /* Type of a key in btree @id at level @level: */ static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) { - return level ? BKEY_TYPE_btree : (enum btree_node_type) id; + return level ? BKEY_TYPE_btree : (unsigned) id + 1; } /* Type of keys @b contains: */ @@ -654,19 +658,21 @@ static inline enum btree_node_type btree_node_type(struct btree *b) return __btree_node_type(b->c.level, b->c.btree_id); } +const char *bch2_btree_node_type_str(enum btree_node_type); + #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ - (BIT(BKEY_TYPE_extents)| \ - BIT(BKEY_TYPE_alloc)| \ - BIT(BKEY_TYPE_inodes)| \ - BIT(BKEY_TYPE_stripes)| \ - BIT(BKEY_TYPE_reflink)| \ - BIT(BKEY_TYPE_btree)) + (BIT_ULL(BKEY_TYPE_extents)| \ + BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_reflink)| \ + BIT_ULL(BKEY_TYPE_btree)) #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ - (BIT(BKEY_TYPE_alloc)| \ - BIT(BKEY_TYPE_inodes)| \ - BIT(BKEY_TYPE_stripes)| \ - BIT(BKEY_TYPE_snapshots)) + (BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_snapshots)) #define BTREE_NODE_TYPE_HAS_TRIGGERS \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ @@ -674,13 +680,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b) static inline bool btree_node_type_needs_gc(enum btree_node_type type) { - return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); + return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); } static inline bool btree_node_type_is_extents(enum btree_node_type type) { const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << nr) +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) BCH_BTREE_IDS() #undef x ; @@ -690,7 +696,7 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type) static inline bool btree_id_is_extents(enum btree_id btree) { - return btree_node_type_is_extents((enum btree_node_type) btree); + return btree_node_type_is_extents(__btree_node_type(0, btree)); } static inline bool btree_type_has_snapshots(enum btree_id id) @@ -704,6 +710,17 @@ static inline bool btree_type_has_snapshots(enum btree_id id) return (1U << id) & mask; } +static inline bool btree_type_has_snapshot_field(enum btree_id id) +{ + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; +} + static inline bool btree_type_has_ptrs(enum btree_id id) { const unsigned mask = 0 diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7dbf6b6c7f34..39c2db68123b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1274,14 +1274,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf)) { + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { printbuf_reset(&buf); prt_printf(&buf, "inserting invalid bkey\n "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); prt_printf(&buf, "\n "); bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf); + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); bch2_fs_inconsistent(c, "%s", buf.buf); dump_stack(); @@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, out: if (new_path) bch2_path_put(trans, new_path, true); - bch2_btree_path_downgrade(trans, iter->path); + bch2_trans_downgrade(trans); return ret; err: bch2_btree_node_free_never_used(as, trans, n); @@ -2411,30 +2411,24 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry r->level = entry->level; r->alive = true; - bkey_copy(&r->key, &entry->start[0]); + bkey_copy(&r->key, (struct bkey_i *) entry->start); mutex_unlock(&c->btree_root_lock); } struct jset_entry * bch2_btree_roots_to_journal_entries(struct bch_fs *c, - struct jset_entry *start, - struct jset_entry *end) + struct jset_entry *end, + unsigned long skip) { - struct jset_entry *entry; - unsigned long have = 0; unsigned i; - for (entry = start; entry < end; entry = vstruct_next(entry)) - if (entry->type == BCH_JSET_ENTRY_btree_root) - __set_bit(entry->btree_id, &have); - mutex_lock(&c->btree_root_lock); for (i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); - if (r->alive && !test_bit(i, &have)) { + if (r->alive && !test_bit(i, &skip)) { journal_entry_set(end, BCH_JSET_ENTRY_btree_root, i, r->level, &r->key, r->key.k.u64s); end = vstruct_next(end); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 5e0a467fe905..4df21512d640 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree_node_entry *bne = max(write_block(b), (void *) btree_bkey_last(b, bset_tree_last(b))); ssize_t remaining_space = - __bch_btree_u64s_remaining(c, b, &bne->keys.start[0]); + __bch_btree_u64s_remaining(c, b, bne->keys.start); if (unlikely(bset_written(b, bset(b, t)))) { if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) @@ -303,7 +303,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b, k.needs_whiteout = true; b->whiteout_u64s += k.u64s; - bkey_copy(unwritten_whiteouts_start(c, b), &k); + bkey_p_copy(unwritten_whiteouts_start(c, b), &k); } /* @@ -325,7 +325,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *); void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, - struct jset_entry *, struct jset_entry *); + struct jset_entry *, unsigned long); void bch2_do_pending_node_rewrites(struct bch_fs *); void bch2_free_pending_node_rewrites(struct bch_fs *); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a1a4b5feadaa..58d8c6ffd955 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -370,8 +370,8 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && - fsck_err(c, "no replicas entry\n" - " while marking %s", + fsck_err(c, ptr_to_missing_replicas_entry, + "no replicas entry\n while marking %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { percpu_up_read(&c->mark_lock); ret = bch2_mark_replicas(c, r); @@ -695,6 +695,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -707,6 +708,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_too_stale, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -720,6 +722,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (b_gen != ptr->gen && !ptr->cached) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_stale_dirty_ptr, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -741,6 +744,7 @@ static int check_bucket_ref(struct btree_trans *trans, ptr_data_type && bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_bucket_data_type_mismatch, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -754,6 +758,7 @@ static int check_bucket_ref(struct btree_trans *trans, if ((u64) bucket_sectors + sectors > U32_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_bucket_sector_count_overflow, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -935,14 +940,12 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, return 0; } -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1018,6 +1021,14 @@ int bch2_mark_extent(struct btree_trans *trans, return 0; } +int bch2_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); +} + int bch2_mark_stripe(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_s_c new, @@ -1124,13 +1135,11 @@ int bch2_mark_stripe(struct btree_trans *trans, return 0; } -int bch2_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1157,6 +1166,14 @@ int bch2_mark_reservation(struct btree_trans *trans, return 0; } +int bch2_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); +} + static s64 __bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 start, u64 end, @@ -1183,7 +1200,8 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans, *idx = r->offset; return 0; not_found: - if (fsck_err(c, "pointer to missing indirect extent\n" + if (fsck_err(c, reflink_p_to_missing_reflink_v, + "pointer to missing indirect extent\n" " %s\n" " missing range %llu-%llu", (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), @@ -1211,13 +1229,11 @@ fsck_err: return ret; } -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -1251,6 +1267,14 @@ int bch2_mark_reflink_p(struct btree_trans *trans, return ret; } +int bch2_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); +} + void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1298,7 +1322,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, struct bch_fs *c = trans->c; static int warned_disk_usage = 0; bool warn = false; - unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; struct replicas_delta *d, *d2; struct replicas_delta *top = (void *) deltas->d + deltas->used; struct bch_fs_usage *dst; @@ -1357,7 +1381,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) bch2_trans_inconsistent(trans, - "disk usage increased %lli more than %u sectors reserved)", + "disk usage increased %lli more than %llu sectors reserved)", should_not_have_added, disk_res_sectors); return 0; need_mark: @@ -1452,15 +1476,11 @@ err: return ret; } -int bch2_trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) +static int __trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1517,6 +1537,24 @@ int bch2_trans_mark_extent(struct btree_trans *trans, return ret; } +int bch2_trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_i *new, + unsigned flags) +{ + struct bch_fs *c = trans->c; + int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) - + (int) bch2_bkey_needs_rebalance(c, old); + + if (mod) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0); + if (ret) + return ret; + } + + return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); +} + static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, struct bkey_s_c_stripe s, unsigned idx, bool deleting) @@ -1670,15 +1708,10 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } -int bch2_trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +static int __trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; struct replicas_delta_list *d; @@ -1700,7 +1733,16 @@ int bch2_trans_mark_reservation(struct btree_trans *trans, return 0; } -static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, +int bch2_trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) +{ + return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); +} + +static int trans_mark_reflink_p_segment(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 *idx, unsigned flags) { @@ -1767,35 +1809,38 @@ err: return ret; } -int bch2_trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +static int __trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); u64 idx, end_idx; int ret = 0; - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; - - v->front_pad = v->back_pad = 0; - } - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); end_idx = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); while (idx < end_idx && !ret) - ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags); - + ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); return ret; } +int bch2_trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) +{ + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; + + v->front_pad = v->back_pad = 0; + } + + return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); +} + static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, @@ -1818,6 +1863,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, if (a->v.data_type && type && a->v.data_type != type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", iter.pos.inode, iter.pos.offset, a->v.gen, @@ -1825,16 +1871,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bch2_data_types[type], bch2_data_types[type]); ret = -EIO; - goto out; + goto err; } - a->v.data_type = type; - a->v.dirty_sectors = sectors; - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto out; -out: + if (a->v.data_type != type || + a->v.dirty_sectors != sectors) { + a->v.data_type = type; + a->v.dirty_sectors = sectors; + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + } +err: bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1929,6 +1975,22 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) return ret; } +int bch2_trans_mark_dev_sbs(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_online_member(ca, c, i) { + int ret = bch2_trans_mark_dev_sb(c, ca); + if (ret) { + percpu_ref_put(&ca->ref); + return ret; + } + } + + return 0; +} + /* Disk reservations: */ #define SECTORS_CACHE 1024 diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index bf8d7f407e9c..21f6cb356921 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -339,12 +339,27 @@ int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ +({ \ + int ret = 0; \ + \ + if (_old.k->type) \ + ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ + if (!ret && _new.k->type) \ + ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ + ret; \ +}) + +#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ + mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) + void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, size_t, enum bch_data_type, unsigned); int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *); +int bch2_trans_mark_dev_sbs(struct bch_fs *); static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) { diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index f69e15dc699c..4bb88aefed12 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -332,8 +332,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, .p.data_type = ctx->stats.data_type, - .p.btree_id = ctx->stats.btree_id, - .p.pos = ctx->stats.pos, + .p.btree_id = ctx->stats.pos.btree, + .p.pos = ctx->stats.pos.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_total = bch2_fs_usage_read_short(c).used, }; diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 1480b64547b0..a8b148ec2a2b 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -697,14 +697,32 @@ err: return ret; } +void bch2_compression_opt_to_text(struct printbuf *out, u64 v) +{ + struct bch_compression_opt opt = bch2_compression_decode(v); + + if (opt.type < BCH_COMPRESSION_OPT_NR) + prt_str(out, bch2_compression_opts[opt.type]); + else + prt_printf(out, "(unknown compression opt %u)", opt.type); + if (opt.level) + prt_printf(out, ":%u", opt.level); +} + void bch2_opt_compression_to_text(struct printbuf *out, struct bch_fs *c, struct bch_sb *sb, u64 v) { - struct bch_compression_opt opt = bch2_compression_decode(v); + return bch2_compression_opt_to_text(out, v); +} - prt_str(out, bch2_compression_opts[opt.type]); - if (opt.level) - prt_printf(out, ":%u", opt.level); +int bch2_opt_compression_validate(u64 v, struct printbuf *err) +{ + if (!bch2_compression_opt_valid(v)) { + prt_printf(err, "invalid compression opt %llu", v); + return -BCH_ERR_invalid_sb_opt_compression; + } + + return 0; } diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 052ea303241f..607fd5e232c9 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -4,12 +4,18 @@ #include "extents_types.h" +static const unsigned __bch2_compression_opt_to_type[] = { +#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, + BCH_COMPRESSION_OPTS() +#undef x +}; + struct bch_compression_opt { u8 type:4, level:4; }; -static inline struct bch_compression_opt bch2_compression_decode(unsigned v) +static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) { return (struct bch_compression_opt) { .type = v & 15, @@ -17,17 +23,25 @@ static inline struct bch_compression_opt bch2_compression_decode(unsigned v) }; } +static inline bool bch2_compression_opt_valid(unsigned v) +{ + struct bch_compression_opt opt = __bch2_compression_decode(v); + + return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); +} + +static inline struct bch_compression_opt bch2_compression_decode(unsigned v) +{ + return bch2_compression_opt_valid(v) + ? __bch2_compression_decode(v) + : (struct bch_compression_opt) { 0 }; +} + static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) { return opt.type|(opt.level << 4); } -static const unsigned __bch2_compression_opt_to_type[] = { -#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, - BCH_COMPRESSION_OPTS() -#undef x -}; - static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) { return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; @@ -44,12 +58,16 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned); void bch2_fs_compress_exit(struct bch_fs *); int bch2_fs_compress_init(struct bch_fs *); +void bch2_compression_opt_to_text(struct printbuf *, u64); + int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); +int bch2_opt_compression_validate(u64, struct printbuf *); #define bch2_opt_compression (struct bch_opt_fn) { \ - .parse = bch2_opt_compression_parse, \ - .to_text = bch2_opt_compression_to_text, \ + .parse = bch2_opt_compression_parse, \ + .to_text = bch2_opt_compression_to_text, \ + .validate = bch2_opt_compression_validate, \ } #endif /* _BCACHEFS_COMPRESS_H */ diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 114f86b45fd5..87b4b2d1ec76 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, _ret; \ }) +#define darray_remove_item(_d, _pos) \ + array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data) + #define darray_for_each(_d, _i) \ for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++) +#define darray_for_each_reverse(_d, _i) \ + for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) + #define darray_init(_d) \ do { \ (_d)->data = NULL; \ diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 899ff46de8e0..0771a6d880bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -13,6 +13,7 @@ #include "keylist.h" #include "move.h" #include "nocow_locking.h" +#include "rebalance.h" #include "subvolume.h" #include "trace.h" @@ -161,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, if (((1U << i) & m->data_opts.rewrite_ptrs) && (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && !ptr->cached) { - bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr); - /* - * See comment below: bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr); - */ rewrites_found |= 1U << i; } i++; @@ -211,14 +208,8 @@ restart_drop_extra_replicas: if (!p.ptr.cached && durability - ptr_durability >= m->op.opts.data_replicas) { durability -= ptr_durability; - bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr); - /* - * Currently, we're dropping unneeded replicas - * instead of marking them as cached, since - * cached data in stripe buckets prevents them - * from being reused: + bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr); - */ goto restart_drop_extra_replicas; } } @@ -251,11 +242,11 @@ restart_drop_extra_replicas: ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p); - if (ret) - goto err; - - ret = bch2_trans_update(trans, &iter, insert, + k.k->p, insert->k.p) ?: + bch2_bkey_set_needs_rebalance(c, insert, + op->opts.background_target, + op->opts.background_compression) ?: + bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, @@ -281,11 +272,11 @@ next: } continue; nowork: - if (m->ctxt && m->ctxt->stats) { + if (m->stats && m->stats) { BUG_ON(k.k->p.offset <= iter.pos.offset); - atomic64_inc(&m->ctxt->stats->keys_raced); + atomic64_inc(&m->stats->keys_raced); atomic64_add(k.k->p.offset - iter.pos.offset, - &m->ctxt->stats->sectors_raced); + &m->stats->sectors_raced); } this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); @@ -439,6 +430,8 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; m->data_opts = data_opts; + m->ctxt = ctxt; + m->stats = ctxt ? ctxt->stats : NULL; bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); @@ -487,7 +480,7 @@ int bch2_data_update_init(struct btree_trans *trans, if (c->opts.nocow_enabled) { if (ctxt) { - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || !atomic_read(&ctxt->read_sectors)); diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 7ca1f98d7e94..9dc17b9d8379 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -23,6 +23,7 @@ struct data_update { struct bkey_buf k; struct data_update_opts data_opts; struct moving_context *ctxt; + struct bch_move_stats *stats; struct bch_write_op op; }; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 75a3dc7cbd47..57c5128db173 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -517,7 +517,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * prt_printf(out, "%px btree=%s l=%u ", b, - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level); prt_newline(out); @@ -919,18 +919,18 @@ void bch2_fs_debug_init(struct bch_fs *c) bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - debugfs_create_file(bch2_btree_ids[bd->id], + debugfs_create_file(bch2_btree_id_str(bd->id), 0400, c->btree_debug_dir, bd, &btree_debug_ops); snprintf(name, sizeof(name), "%s-formats", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &btree_format_debug_ops); snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &bfloat_failed_debug_ops); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 6c6c8d57d72b..1a0f2d571569 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -97,61 +97,51 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); + int ret = 0; - if (!d_name.len) { - prt_printf(err, "empty name"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!d_name.len, c, err, + dirent_empty_name, + "empty name"); - if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, + dirent_val_too_big, + "value too big (%zu > %u)", + bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); /* * Check new keys don't exceed the max length * (older keys may be larger.) */ - if ((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX) { - prt_printf(err, "dirent name too big (%u > %u)", - d_name.len, BCH_NAME_MAX); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len != strnlen(d_name.name, d_name.len)) { - prt_printf(err, "dirent has stray data after name's NUL"); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (memchr(d_name.name, '/', d_name.len)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode) { - prt_printf(err, "dirent points to own directory"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err, + dirent_name_too_long, + "dirent name too big (%u > %u)", + d_name.len, BCH_NAME_MAX); + + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, + dirent_name_embedded_nul, + "dirent has stray data after name's NUL"); + + bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, + dirent_name_dot_or_dotdot, + "invalid name"); + + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, + dirent_name_has_slash, + "name with /"); + + bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && + le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, + dirent_to_itself, + "dirent points to own directory"); +fsck_err: + return ret; } void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index e9fa1df38232..cd262bf4d9c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,7 +7,7 @@ enum bkey_invalid_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index e00133b6ea51..d613695abf9f 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -175,6 +175,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) dst->deleted = BCH_GROUP_DELETED(src); dst->parent = BCH_GROUP_PARENT(src); + memcpy(dst->label, src->label, sizeof(dst->label)); } for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { @@ -382,7 +383,57 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) return v; } -void bch2_disk_path_to_text(struct printbuf *out, struct bch_sb *sb, unsigned v) +void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) +{ + struct bch_disk_groups_cpu *groups; + struct bch_disk_group_cpu *g; + unsigned nr = 0; + u16 path[32]; + + out->atomic++; + rcu_read_lock(); + groups = rcu_dereference(c->disk_groups); + if (!groups) + goto invalid; + + while (1) { + if (nr == ARRAY_SIZE(path)) + goto invalid; + + if (v >= groups->nr) + goto invalid; + + g = groups->entries + v; + + if (g->deleted) + goto invalid; + + path[nr++] = v; + + if (!g->parent) + break; + + v = g->parent - 1; + } + + while (nr) { + v = path[--nr]; + g = groups->entries + v; + + prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); + if (nr) + prt_printf(out, "."); + } +out: + rcu_read_unlock(); + out->atomic--; + return; +invalid: + prt_printf(out, "invalid label %u", v); + goto out; +} + +void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) { struct bch_sb_field_disk_groups *groups = bch2_sb_field_get(sb, disk_groups); @@ -493,10 +544,7 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, return -EINVAL; } -void bch2_opt_target_to_text(struct printbuf *out, - struct bch_fs *c, - struct bch_sb *sb, - u64 v) +void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { struct target t = target_decode(v); @@ -504,47 +552,69 @@ void bch2_opt_target_to_text(struct printbuf *out, case TARGET_NULL: prt_printf(out, "none"); break; - case TARGET_DEV: - if (c) { - struct bch_dev *ca; - - rcu_read_lock(); - ca = t.dev < c->sb.nr_devices - ? rcu_dereference(c->devs[t.dev]) - : NULL; - - if (ca && percpu_ref_tryget(&ca->io_ref)) { - prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); - percpu_ref_put(&ca->io_ref); - } else if (ca) { - prt_printf(out, "offline device %u", t.dev); - } else { - prt_printf(out, "invalid device %u", t.dev); - } - - rcu_read_unlock(); + case TARGET_DEV: { + struct bch_dev *ca; + + rcu_read_lock(); + ca = t.dev < c->sb.nr_devices + ? rcu_dereference(c->devs[t.dev]) + : NULL; + + if (ca && percpu_ref_tryget(&ca->io_ref)) { + prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); + percpu_ref_put(&ca->io_ref); + } else if (ca) { + prt_printf(out, "offline device %u", t.dev); } else { - struct bch_member m = bch2_sb_member_get(sb, t.dev); - - if (bch2_dev_exists(sb, t.dev)) { - prt_printf(out, "Device "); - pr_uuid(out, m.uuid.b); - prt_printf(out, " (%u)", t.dev); - } else { - prt_printf(out, "Bad device %u", t.dev); - } + prt_printf(out, "invalid device %u", t.dev); } + + rcu_read_unlock(); break; + } case TARGET_GROUP: - if (c) { - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); - mutex_unlock(&c->sb_lock); + bch2_disk_path_to_text(out, c, t.group); + break; + default: + BUG(); + } +} + +void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) +{ + struct target t = target_decode(v); + + switch (t.type) { + case TARGET_NULL: + prt_printf(out, "none"); + break; + case TARGET_DEV: { + struct bch_member m = bch2_sb_member_get(sb, t.dev); + + if (bch2_dev_exists(sb, t.dev)) { + prt_printf(out, "Device "); + pr_uuid(out, m.uuid.b); + prt_printf(out, " (%u)", t.dev); } else { - bch2_disk_path_to_text(out, sb, t.group); + prt_printf(out, "Bad device %u", t.dev); } break; + } + case TARGET_GROUP: + bch2_disk_path_to_text_sb(out, sb, t.group); + break; default: BUG(); } } + +void bch2_opt_target_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) +{ + if (c) + bch2_target_to_text(out, c, v); + else + bch2_target_to_text_sb(out, sb, v); +} diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index bd7711767fd4..441826fff224 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_DISK_GROUPS_H #define _BCACHEFS_DISK_GROUPS_H +#include "disk_groups_types.h" + extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups; static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups) @@ -83,7 +85,10 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *); /* Exported for userspace bcachefs-tools: */ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); -void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); +void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned); +void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned); + +void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h new file mode 100644 index 000000000000..a54ef085b13d --- /dev/null +++ b/fs/bcachefs/disk_groups_types.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H +#define _BCACHEFS_DISK_GROUPS_TYPES_H + +struct bch_disk_group_cpu { + bool deleted; + u16 parent; + u8 label[BCH_SB_LABEL_SIZE]; + struct bch_devs_mask devs; +}; + +struct bch_disk_groups_cpu { + struct rcu_head rcu; + unsigned nr; + struct bch_disk_group_cpu entries[] __counted_by(nr); +}; + +#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8646856e4539..875f7c5a6fca 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,29 +105,26 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + int ret = 0; - if (bkey_eq(k.k->p, POS_MIN)) { - prt_printf(err, "stripe at POS_MIN"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->p.inode) { - prt_printf(err, "nonzero inode field"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || + bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, + stripe_pos_bad, + "stripe at bad pos"); - if (bkey_val_u64s(k.k) < stripe_val_u64s(s)) { - prt_printf(err, "incorrect value size (%zu < %u)", - bkey_val_u64s(k.k), stripe_val_u64s(s)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, + stripe_val_size_bad, + "incorrect value size (%zu < %u)", + bkey_val_u64s(k.k), stripe_val_u64s(s)); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, @@ -153,6 +150,7 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); if (i < nr_data) prt_printf(out, "#%u", stripe_blockcount_get(s, i)); + prt_printf(out, " gen %u", ptr->gen); if (ptr_stale(ca, ptr)) prt_printf(out, " stale"); } @@ -306,16 +304,21 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - struct printbuf buf2 = PRINTBUF; + struct printbuf err = PRINTBUF; + struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); + + prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", + want.hi, want.lo, + got.hi, got.lo, + bch2_csum_types[v->csum_type]); + prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); + bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); + bch_err_ratelimited(ca, "%s", err.buf); + printbuf_exit(&err); - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key)); - - bch_err_ratelimited(c, - "stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s", - (void *) _RET_IP_, i, j, v->csum_type, - want.lo, got.lo, buf2.buf); - printbuf_exit(&buf2); clear_bit(i, buf->valid); + + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); break; } @@ -373,7 +376,11 @@ static void ec_block_endio(struct bio *bio) struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; - if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, + bio_data_dir(bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "erasure coding %s error: %s", bio_data_dir(bio) ? "write" : "read", bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); @@ -474,14 +481,10 @@ err: return ret; } -static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) -{ - return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe)); -} - /* recovery read path: */ -int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) +int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio) { + struct bch_fs *c = trans->c; struct ec_stripe_buf *buf; struct closure cl; struct bch_stripe *v; @@ -496,7 +499,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) if (!buf) return -BCH_ERR_ENOMEM_ec_read_extent; - ret = get_stripe_key(c, rbio->pick.ec.idx, buf); + ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf)); if (ret) { bch_err_ratelimited(c, "error doing reconstruct read: error %i looking up stripe", ret); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 966d165a3b66..7d0237c9819f 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,7 +8,7 @@ enum bkey_invalid_flags; -int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -199,7 +199,7 @@ struct ec_stripe_head { struct ec_stripe_new *s; }; -int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *); +int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 7cc083776a2e..68a1a96bb7ca 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -3,6 +3,8 @@ #define _BCACHEFS_ERRCODE_H #define BCH_ERRCODES() \ + x(ERANGE, ERANGE_option_too_small) \ + x(ERANGE, ERANGE_option_too_big) \ x(ENOMEM, ENOMEM_stripe_buf) \ x(ENOMEM, ENOMEM_replicas_table) \ x(ENOMEM, ENOMEM_cpu_replicas) \ @@ -213,6 +215,8 @@ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ x(BCH_ERR_invalid_sb, invalid_sb_clean) \ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ + x(BCH_ERR_invalid_sb, invalid_sb_errors) \ + x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 2a5af8872613..7b28d37922fd 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -56,8 +56,9 @@ void bch2_io_error_work(struct work_struct *work) up_write(&c->state_lock); } -void bch2_io_error(struct bch_dev *ca) +void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) { + atomic64_inc(&ca->errors[type]); //queue_work(system_long_wq, &ca->io_error_work); } @@ -116,31 +117,34 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) return NULL; - list_for_each_entry(s, &c->fsck_errors, list) + list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->fmt == fmt) { /* * move it to the head of the list: repeated fsck errors * are common */ - list_move(&s->list, &c->fsck_errors); + list_move(&s->list, &c->fsck_error_msgs); return s; } s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) { - if (!c->fsck_alloc_err) + if (!c->fsck_alloc_msgs_err) bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); - c->fsck_alloc_err = true; + c->fsck_alloc_msgs_err = true; return NULL; } INIT_LIST_HEAD(&s->list); s->fmt = fmt; - list_add(&s->list, &c->fsck_errors); + list_add(&s->list, &c->fsck_error_msgs); return s; } -int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) +int bch2_fsck_err(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) { struct fsck_err_state *s = NULL; va_list args; @@ -148,11 +152,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; + bch2_sb_error_count(c, err); + va_start(args, fmt); prt_vprintf(out, fmt, args); va_end(args); - mutex_lock(&c->fsck_error_lock); + mutex_lock(&c->fsck_error_msgs_lock); s = fsck_err_get(c, fmt); if (s) { /* @@ -162,7 +168,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) */ if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { ret = s->ret; - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); printbuf_exit(&buf); return ret; } @@ -257,7 +263,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) if (s) s->ret = ret; - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); printbuf_exit(&buf); @@ -278,9 +284,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; - mutex_lock(&c->fsck_error_lock); + mutex_lock(&c->fsck_error_msgs_lock); - list_for_each_entry_safe(s, n, &c->fsck_errors, list) { + list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { if (s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); @@ -289,5 +295,5 @@ void bch2_flush_fsck_errs(struct bch_fs *c) kfree(s); } - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 7ce9540052e5..d167d65986e0 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <linux/printk.h> +#include "sb-errors.h" struct bch_dev; struct bch_fs; @@ -101,18 +102,26 @@ struct fsck_err_state { char *last_msg; }; -#define FSCK_CAN_FIX (1 << 0) -#define FSCK_CAN_IGNORE (1 << 1) -#define FSCK_NEED_FSCK (1 << 2) -#define FSCK_NO_RATELIMIT (1 << 3) +enum bch_fsck_flags { + FSCK_CAN_FIX = 1 << 0, + FSCK_CAN_IGNORE = 1 << 1, + FSCK_NEED_FSCK = 1 << 2, + FSCK_NO_RATELIMIT = 1 << 3, +}; + +#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) -__printf(3, 4) __cold -int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...); +__printf(4, 5) __cold +int bch2_fsck_err(struct bch_fs *, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); void bch2_flush_fsck_errs(struct bch_fs *); -#define __fsck_err(c, _flags, msg, ...) \ +#define __fsck_err(c, _flags, _err_type, ...) \ ({ \ - int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__); \ + int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \ + __VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix && \ _ret != -BCH_ERR_fsck_ignore) { \ @@ -127,26 +136,53 @@ void bch2_flush_fsck_errs(struct bch_fs *); /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -#define __fsck_err_on(cond, c, _flags, ...) \ - (unlikely(cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false) +#define __fsck_err_on(cond, c, _flags, _err_type, ...) \ + (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false) + +#define need_fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) + +#define need_fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) + +#define mustfix_fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) + +#define mustfix_fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) -#define need_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) +#define fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -#define need_fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) +#define fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -#define mustfix_fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__) +static inline void bch2_bkey_fsck_err(struct bch_fs *c, + struct printbuf *err_msg, + enum bch_sb_error_id err_type, + const char *fmt, ...) +{ + va_list args; -#define mustfix_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__) + va_start(args, fmt); + prt_vprintf(err_msg, fmt, args); + va_end(args); -#define fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) +} -#define fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) +#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +do { \ + prt_printf(_err_msg, __VA_ARGS__); \ + bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ + ret = -BCH_ERR_invalid_bkey; \ + goto fsck_err; \ +} while (0) + +#define bkey_fsck_err_on(cond, ...) \ +do { \ + if (unlikely(cond)) \ + bkey_fsck_err(__VA_ARGS__); \ +} while (0) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW @@ -179,26 +215,26 @@ do { \ void bch2_io_error_work(struct work_struct *); /* Does the error handling without logging a message */ -void bch2_io_error(struct bch_dev *); +void bch2_io_error(struct bch_dev *, enum bch_member_error_type); -#define bch2_dev_io_err_on(cond, ca, ...) \ +#define bch2_dev_io_err_on(cond, ca, _type, ...) \ ({ \ bool _ret = (cond); \ \ if (_ret) { \ bch_err_dev_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca); \ + bch2_io_error(ca, _type); \ } \ _ret; \ }) -#define bch2_dev_inum_io_err_on(cond, ca, ...) \ +#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \ ({ \ bool _ret = (cond); \ \ if (_ret) { \ bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca); \ + bch2_io_error(ca, _type); \ } \ _ret; \ }) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 1b25f84e4b9c..a864de231b69 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -13,6 +13,7 @@ #include "btree_iter.h" #include "buckets.h" #include "checksum.h" +#include "compress.h" #include "debug.h" #include "disk_groups.h" #include "error.h" @@ -162,17 +163,19 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; + + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, + btree_ptr_val_too_big, + "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, @@ -181,17 +184,20 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) { - prt_printf(err, "value too big (%zu > %zu)", - bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; + + bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err, + btree_ptr_v2_val_too_big, + "value too big (%zu > %zu)", + bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c, @@ -372,19 +378,18 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + int ret = 0; - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) { - prt_printf(err, "invalid nr_replicas (%u)", - r.v->nr_replicas); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, + reservation_key_nr_replicas_invalid, + "invalid nr_replicas (%u)", r.v->nr_replicas); +fsck_err: + return ret; } void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, @@ -757,18 +762,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, return i; } -static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) -{ - union bch_extent_entry *next = extent_entry_next(entry); - - /* stripes have ptrs, but their layout doesn't work with this code */ - BUG_ON(k.k->type == KEY_TYPE_stripe); - - memmove_u64s_down(entry, next, - (u64 *) bkey_val_end(k) - (u64 *) next); - k.k->u64s -= (u64 *) next - (u64 *) entry; -} - /* * Returns pointer to the next entry after the one being dropped: */ @@ -992,10 +985,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - const struct bch_extent_stripe_ptr *ec; - struct bch_dev *ca; bool first = true; if (c) @@ -1006,9 +995,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " "); switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + case BCH_EXTENT_ENTRY_ptr: { + const struct bch_extent_ptr *ptr = entry_to_ptr(entry); + struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] ? bch_dev_bkey_exists(c, ptr->dev) : NULL; @@ -1030,10 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " stale"); } break; + } case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); + case BCH_EXTENT_ENTRY_crc128: { + struct bch_extent_crc_unpacked crc = + bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", crc.compressed_size, @@ -1042,12 +1033,26 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, bch2_csum_types[crc.csum_type], bch2_compression_types[crc.compression_type]); break; - case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; + } + case BCH_EXTENT_ENTRY_stripe_ptr: { + const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; prt_printf(out, "ec: idx %llu block %u", (u64) ec->idx, ec->block); break; + } + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + prt_str(out, "rebalance: target "); + if (c) + bch2_target_to_text(out, c, r->target); + else + prt_printf(out, "%u", r->target); + prt_str(out, " compression "); + bch2_compression_opt_to_text(out, r->compression); + break; + } default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; @@ -1057,7 +1062,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -static int extent_ptr_invalid(const struct bch_fs *c, +static int extent_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, const struct bch_extent_ptr *ptr, @@ -1070,6 +1075,7 @@ static int extent_ptr_invalid(const struct bch_fs *c, u64 bucket; u32 bucket_offset; struct bch_dev *ca; + int ret = 0; if (!bch2_dev_exists2(c, ptr->dev)) { /* @@ -1080,41 +1086,33 @@ static int extent_ptr_invalid(const struct bch_fs *c, if (flags & BKEY_INVALID_WRITE) return 0; - prt_printf(err, "pointer to invalid device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; + bkey_fsck_err(c, err, ptr_to_invalid_device, + "pointer to invalid device (%u)", ptr->dev); } ca = bch_dev_bkey_exists(c, ptr->dev); bkey_for_each_ptr(ptrs, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) { - prt_printf(err, "multiple pointers to same device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, + ptr_to_duplicate_device, + "multiple pointers to same device (%u)", ptr->dev); bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset); - if (bucket >= ca->mi.nbuckets) { - prt_printf(err, "pointer past last bucket (%llu > %llu)", - bucket, ca->mi.nbuckets); - return -BCH_ERR_invalid_bkey; - } - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) { - prt_printf(err, "pointer before first bucket (%llu < %u)", - bucket, ca->mi.first_bucket); - return -BCH_ERR_invalid_bkey; - } - - if (bucket_offset + size_ondisk > ca->mi.bucket_size) { - prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)", + bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err, + ptr_after_last_bucket, + "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets); + bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err, + ptr_before_first_bucket, + "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket); + bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err, + ptr_spans_multiple_buckets, + "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, ca->mi.bucket_size); - return -BCH_ERR_invalid_bkey; - } - - return 0; +fsck_err: + return ret; } -int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -1124,24 +1122,22 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, unsigned size_ondisk = k.k->size; unsigned nonce = UINT_MAX; unsigned nr_ptrs = 0; - bool unwritten = false, have_ec = false, crc_since_last_ptr = false; - int ret; + bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false; + int ret = 0; if (bkey_is_btree_ptr(k.k)) size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) { - prt_printf(err, "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, + extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - if (bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry)) { - prt_printf(err, "has non ptr field"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && + !extent_entry_is_ptr(entry), c, err, + btree_ptr_has_non_ptr, + "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: @@ -1150,22 +1146,15 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (ret) return ret; - if (nr_ptrs && unwritten != entry->ptr.unwritten) { - prt_printf(err, "extent with unwritten and written ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) { - prt_printf(err, "has unwritten ptrs"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, + ptr_cached_and_erasure_coded, + "cached, erasure coded ptr"); - if (entry->ptr.cached && have_ec) { - prt_printf(err, "cached, erasure coded ptr"); - return -BCH_ERR_invalid_bkey; - } + if (!entry->ptr.unwritten) + have_written = true; + else + have_unwritten = true; - unwritten = entry->ptr.unwritten; have_ec = false; crc_since_last_ptr = false; nr_ptrs++; @@ -1175,72 +1164,77 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - if (crc.offset + crc.live_size > - crc.uncompressed_size) { - prt_printf(err, "checksum offset + key size > uncompressed size"); - return -BCH_ERR_invalid_bkey; - } - - size_ondisk = crc.compressed_size; - - if (!bch2_checksum_type_valid(c, crc.csum_type)) { - prt_printf(err, "invalid checksum type"); - return -BCH_ERR_invalid_bkey; - } - - if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) { - prt_printf(err, "invalid compression type"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, + ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, + ptr_crc_csum_type_unknown, + "invalid checksum type"); + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, + ptr_crc_compression_type_unknown, + "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) { - prt_printf(err, "incorrect nonce"); - return -BCH_ERR_invalid_bkey; - } + else if (nonce != crc.offset + crc.nonce) + bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + "incorrect nonce"); } - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc_since_last_ptr, c, err, + ptr_crc_redundant, + "redundant crc entry"); crc_since_last_ptr = true; + + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err, + ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + + size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(have_ec, c, err, + ptr_stripe_redundant, + "redundant stripe entry"); have_ec = true; break; - case BCH_EXTENT_ENTRY_rebalance: + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + if (!bch2_compression_opt_valid(r->compression)) { + struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + prt_printf(err, "invalid compression opt %u:%u", + opt.type, opt.level); + return -BCH_ERR_invalid_bkey; + } break; } + } } - if (!nr_ptrs) { - prt_str(err, "no ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { - prt_str(err, "too many ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } - - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!nr_ptrs, c, err, + extent_ptrs_no_ptrs, + "no ptrs"); + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, + extent_ptrs_too_many_ptrs, + "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); + bkey_fsck_err_on(have_written && have_unwritten, c, err, + extent_ptrs_written_and_unwritten, + "extent with unwritten and written ptrs"); + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, + extent_ptrs_unwritten, + "has unwritten ptrs"); + bkey_fsck_err_on(crc_since_last_ptr, c, err, + extent_ptrs_redundant_crc, + "redundant crc entry"); + bkey_fsck_err_on(have_ec, c, err, + extent_ptrs_redundant_stripe, + "redundant stripe entry"); +fsck_err: + return ret; } void bch2_ptr_swab(struct bkey_s k) @@ -1281,6 +1275,125 @@ void bch2_ptr_swab(struct bkey_s k) } } +const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + + bkey_extent_entry_for_each(ptrs, entry) + if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) + return &entry->rebalance; + + return NULL; +} + +unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned rewrite_ptrs = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { + rewrite_ptrs = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + rewrite_ptrs |= 1U << i; + i++; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + const struct bch_extent_ptr *ptr; + unsigned i = 0; + + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) + rewrite_ptrs |= 1U << i; + i++; + } + } + + return rewrite_ptrs; +} + +bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + /* + * If it's an indirect extent, we don't delete the rebalance entry when + * done so that we know what options were applied - check if it still + * needs work done: + */ + if (r && + k.k->type == KEY_TYPE_reflink_v && + !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) + r = NULL; + + return r != NULL; +} + +int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, + unsigned target, unsigned compression) +{ + struct bkey_s k = bkey_i_to_s(_k); + struct bch_extent_rebalance *r; + bool needs_rebalance; + + if (!bkey_extent_is_direct_data(k.k)) + return 0; + + /* get existing rebalance entry: */ + r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); + if (r) { + if (k.k->type == KEY_TYPE_reflink_v) { + /* + * indirect extents: existing options take precedence, + * so that we don't move extents back and forth if + * they're referenced by different inodes with different + * options: + */ + if (r->target) + target = r->target; + if (r->compression) + compression = r->compression; + } + + r->target = target; + r->compression = compression; + } + + needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); + + if (needs_rebalance && !r) { + union bch_extent_entry *new = bkey_val_end(k); + + new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; + new->rebalance.compression = compression; + new->rebalance.target = target; + new->rebalance.unused = 0; + k.k->u64s += extent_entry_u64s(new); + } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { + /* + * For indirect extents, don't delete the rebalance entry when + * we're finished so that we know we specifically moved it or + * compressed it to its current location/compression type + */ + extent_entry_drop(k, (union bch_extent_entry *) r); + } + + return 0; +} + /* Generic extent code: */ int bch2_cut_front_s(struct bpos where, struct bkey_s k) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 879e7d218b6a..a2ce8a3be13c 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -89,6 +89,18 @@ static inline void __extent_entry_insert(struct bkey_i *k, memcpy_u64s_small(dst, new, extent_entry_u64s(new)); } +static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) +{ + union bch_extent_entry *next = extent_entry_next(entry); + + /* stripes have ptrs, but their layout doesn't work with this code */ + BUG_ON(k.k->type == KEY_TYPE_stripe); + + memmove_u64s_down(entry, next, + (u64 *) bkey_val_end(k) - (u64 *) next); + k.k->u64s -= (u64 *) next - (u64 *) entry; +} + static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) { return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; @@ -190,6 +202,11 @@ static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc) crc.compression_type != BCH_COMPRESSION_TYPE_incompressible); } +static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc) +{ + return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc); +} + /* bkey_ptrs: generically over any key type that has ptrs */ struct bkey_ptrs_c { @@ -383,12 +400,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, @@ -428,7 +445,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); @@ -688,11 +705,19 @@ void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_ptr_swab(struct bkey_s); +const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); +unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, + unsigned, unsigned); +bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); + +int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, + unsigned, unsigned); + /* Generic extent code: */ enum bch_extent_overlap { @@ -737,22 +762,4 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size) k->size = new_size; } -/* - * In extent_sort_fix_overlapping(), insert_fixup_extent(), - * extent_merge_inline() - we're modifying keys in place that are packed. To do - * that we have to unpack the key, modify the unpacked key - then this - * copies/repacks the unpacked to the original as necessary. - */ -static inline void extent_save(struct btree *b, struct bkey_packed *dst, - struct bkey *src) -{ - struct bkey_format *f = &b->format; - struct bkey_i *dst_unpacked; - - if ((dst_unpacked = packed_to_bkey(dst))) - dst_unpacked->k = *src; - else - BUG_ON(!bch2_bkey_pack_key(dst, src, f)); -} - #endif /* _BCACHEFS_EXTENTS_H */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index bb5305441f27..4496cf91a4c1 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans, bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); if (flags & BCH_CREATE_TMPFILE) - new_inode->bi_flags |= BCH_INODE_UNLINKED; + new_inode->bi_flags |= BCH_INODE_unlinked; ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); if (ret) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 58ccc7b91ac7..52f0e7acda3d 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -389,6 +389,21 @@ static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs return ret; } +/* + * Determine when a writepage io is full. We have to limit writepage bios to a + * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to + * what the bounce path in bch2_write_extent() can handle. In theory we could + * loosen this restriction for non-bounce I/O, but we don't have that context + * here. Ideally, we can up this limit and make it configurable in the future + * when the bounce path can be enhanced to accommodate larger source bios. + */ +static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len) +{ + struct bio *bio = &io->op.wbio.bio; + return bio_full(bio, len) || + (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE); +} + static void bch2_writepage_io_done(struct bch_write_op *op) { struct bch_writepage_io *io = @@ -606,9 +621,7 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio, sectors << 9) || - w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= - (BIO_MAX_VECS * PAGE_SIZE) || + bch_io_full(w->io, sectors << 9) || bio_end_sector(&w->io->op.wbio.bio) != sector)) bch2_writepage_do_io(w); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 6a9557e7ecab..5b42a76c4796 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -113,6 +113,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) } else { atomic_set(&dio->cl.remaining, CLOSURE_REMAINING_INITIALIZER + 1); + dio->cl.closure_get_happened = true; } dio->req = req; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 6040bd3f0778..5a39bcb597a3 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans, unsigned newflags = s->flags; unsigned oldflags = bi->bi_flags & s->mask; - if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) && + if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if (!S_ISREG(bi->bi_mode) && !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags) + (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) return -EINVAL; if (s->set_projinherit) { diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index 54a9c21a3b83..d30f9bb056fd 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -6,28 +6,28 @@ /* bcachefs inode flags -> vfs inode flags: */ static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_SYNC] = S_SYNC, - [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, - [__BCH_INODE_APPEND] = S_APPEND, - [__BCH_INODE_NOATIME] = S_NOATIME, + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, }; /* bcachefs inode flags -> FS_IOC_GETFLAGS: */ static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_SYNC] = FS_SYNC_FL, - [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, - [__BCH_INODE_APPEND] = FS_APPEND_FL, - [__BCH_INODE_NODUMP] = FS_NODUMP_FL, - [__BCH_INODE_NOATIME] = FS_NOATIME_FL, + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, }; /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, - [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, - [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP, - [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME, + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; }; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a2a5133fb6b5..166d8d8abe68 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -764,15 +764,15 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); } - if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE) + if (inode->ei_inode.bi_flags & BCH_INODE_immutable) stat->attributes |= STATX_ATTR_IMMUTABLE; stat->attributes_mask |= STATX_ATTR_IMMUTABLE; - if (inode->ei_inode.bi_flags & BCH_INODE_APPEND) + if (inode->ei_inode.bi_flags & BCH_INODE_append) stat->attributes |= STATX_ATTR_APPEND; stat->attributes_mask |= STATX_ATTR_APPEND; - if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP) + if (inode->ei_inode.bi_flags & BCH_INODE_nodump) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= STATX_ATTR_NODUMP; @@ -1213,9 +1213,6 @@ static struct dentry *bch2_get_parent(struct dentry *child) .inum = inode->ei_inode.bi_dir, }; - if (!parent_inum.inum) - return NULL; - return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum)); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index b8f9e7475dc5..9f3e9bd3d767 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey_buf.h" +#include "btree_cache.h" #include "btree_update.h" #include "buckets.h" #include "darray.h" @@ -444,9 +445,10 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, if (i->equiv == n.equiv) { bch_err(c, "snapshot deletion did not finish:\n" " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", - bch2_btree_ids[btree_id], + bch2_btree_id_str(btree_id), pos.inode, pos.offset, i->id, n.id, n.equiv); + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); } } @@ -719,8 +721,9 @@ static int check_key_has_snapshot(struct btree_trans *trans, int ret = 0; if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, - "key in missing snapshot: %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + bkey_in_missing_snapshot, + "key in missing snapshot: %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; fsck_err: @@ -789,6 +792,7 @@ static int hash_check_key(struct btree_trans *trans, if (fsck_err_on(k.k->type == desc.key_type && !desc.cmp_bkey(k, hash_k), c, + hash_table_key_duplicate, "duplicate hash table keys:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), @@ -807,8 +811,9 @@ out: printbuf_exit(&buf); return ret; bad_hash: - if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", - bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash, + if (fsck_err(c, hash_table_key_wrong_offset, + "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", + bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); @@ -849,22 +854,23 @@ static int check_inode(struct btree_trans *trans, BUG_ON(bch2_inode_unpack(k, &u)); if (!full && - !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY| - BCH_INODE_I_SECTORS_DIRTY| - BCH_INODE_UNLINKED))) + !(u.bi_flags & (BCH_INODE_i_size_dirty| + BCH_INODE_i_sectors_dirty| + BCH_INODE_unlinked))) return 0; if (prev->bi_inum != u.bi_inum) *prev = u; if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || - inode_d_type(prev) != inode_d_type(&u), c, + inode_d_type(prev) != inode_d_type(&u), + c, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); return -EINVAL; } - if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) && + if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; @@ -872,7 +878,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; - u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED; + u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; ret = __write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); @@ -884,9 +890,10 @@ static int check_inode(struct btree_trans *trans, return 0; } - if (u.bi_flags & BCH_INODE_UNLINKED && + if (u.bi_flags & BCH_INODE_unlinked && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu unlinked", + fsck_err(c, inode_unlinked_but_clean, + "filesystem marked clean, but inode %llu unlinked", u.bi_inum))) { bch2_trans_unlock(trans); bch2_fs_lazy_rw(c); @@ -896,9 +903,10 @@ static int check_inode(struct btree_trans *trans, return ret; } - if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && + if (u.bi_flags & BCH_INODE_i_size_dirty && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty", + fsck_err(c, inode_i_size_dirty_but_clean, + "filesystem marked clean, but inode %llu has i_size dirty", u.bi_inum))) { bch_verbose(c, "truncating inode %llu", u.bi_inum); @@ -922,15 +930,16 @@ static int check_inode(struct btree_trans *trans, * We truncated without our normal sector accounting hook, just * make sure we recalculate it: */ - u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags |= BCH_INODE_i_sectors_dirty; - u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; + u.bi_flags &= ~BCH_INODE_i_size_dirty; do_update = true; } - if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && + if (u.bi_flags & BCH_INODE_i_sectors_dirty && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty", + fsck_err(c, inode_i_sectors_dirty_but_clean, + "filesystem marked clean, but inode %llu has i_sectors dirty", u.bi_inum))) { s64 sectors; @@ -944,14 +953,14 @@ static int check_inode(struct btree_trans *trans, } u.bi_sectors = sectors; - u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags &= ~BCH_INODE_i_sectors_dirty; do_update = true; } - if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) { + if (u.bi_flags & BCH_INODE_backptr_untrusted) { u.bi_dir = 0; u.bi_dir_offset = 0; - u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED; + u.bi_flags &= ~BCH_INODE_backptr_untrusted; do_update = true; } @@ -1056,10 +1065,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) return -BCH_ERR_internal_fsck_err; } - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, - "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", - w->last_pos.inode, i->snapshot, - i->inode.bi_sectors, i->count)) { + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), + c, inode_i_sectors_wrong, + "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", + w->last_pos.inode, i->snapshot, + i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; ret = fsck_write_inode(trans, &i->inode, i->snapshot); if (ret) @@ -1200,7 +1210,8 @@ static int overlapping_extents_found(struct btree_trans *trans, prt_printf(&buf, "\n overwriting %s extent", pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); - if (fsck_err(c, "overlapping extents%s", buf.buf)) { + if (fsck_err(c, extent_overlapping, + "overlapping extents%s", buf.buf)) { struct btree_iter *old_iter = &iter1; struct disk_reservation res = { 0 }; @@ -1297,6 +1308,28 @@ err: return ret; } +static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc_is_encoded(crc) && + crc.uncompressed_size > encoded_extent_max_sectors) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); + printbuf_exit(&buf); + } + + return 0; +} + static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct inode_walker *inode, @@ -1333,7 +1366,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; if (k.k->type != KEY_TYPE_whiteout) { - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, extent_in_missing_inode, "extent in missing inode:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) @@ -1341,7 +1374,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(i && !S_ISREG(i->inode.bi_mode) && - !S_ISLNK(i->inode.bi_mode), c, + !S_ISLNK(i->inode.bi_mode), + c, extent_in_non_reg_inode, "extent in non regular inode mode %o:\n %s", i->inode.bi_mode, (printbuf_reset(&buf), @@ -1371,9 +1405,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, continue; if (k.k->type != KEY_TYPE_whiteout) { - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && - !bkey_extent_is_reservation(k), c, + !bkey_extent_is_reservation(k), + c, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n %s", i->inode.bi_inum, i->snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1432,7 +1467,8 @@ int bch2_check_extents(struct bch_fs *c) &res, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends); + check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: + check_extent_overbig(trans, &iter, k); })) ?: check_i_sectors(trans, &w); @@ -1446,6 +1482,30 @@ int bch2_check_extents(struct bch_fs *c) return ret; } +int bch2_check_indirect_extents(struct bch_fs *c) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct btree_iter iter; + struct bkey_s_c k; + struct disk_reservation res = { 0 }; + int ret = 0; + + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + POS_MIN, + BTREE_ITER_PREFETCH, k, + &res, NULL, + BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ + bch2_disk_reservation_put(c, &res); + check_extent_overbig(trans, &iter, k); + })); + + bch2_disk_reservation_put(c, &res); + bch2_trans_put(trans); + + bch_err_fn(c, ret); + return ret; +} + static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; @@ -1470,7 +1530,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) continue; } - if (fsck_err_on(i->inode.bi_nlink != i->count, c, + if (fsck_err_on(i->inode.bi_nlink != i->count, + c, inode_dir_wrong_nlink, "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; @@ -1514,27 +1575,28 @@ static int check_dirent_target(struct btree_trans *trans, backpointer_exists = ret; ret = 0; - if (fsck_err_on(S_ISDIR(target->bi_mode) && - backpointer_exists, c, + if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists, + c, inode_dir_multiple_links, "directory %llu with multiple links", target->bi_inum)) { ret = __remove_dirent(trans, d.k->p); goto out; } - if (fsck_err_on(backpointer_exists && - !target->bi_nlink, c, + if (fsck_err_on(backpointer_exists && !target->bi_nlink, + c, inode_multiple_links_but_nlink_0, "inode %llu type %s has multiple links but i_nlink 0", target->bi_inum, bch2_d_types[d.v->d_type])) { target->bi_nlink++; - target->bi_flags &= ~BCH_INODE_UNLINKED; + target->bi_flags &= ~BCH_INODE_unlinked; ret = __write_inode(trans, target, target_snapshot); if (ret) goto err; } - if (fsck_err_on(!backpointer_exists, c, + if (fsck_err_on(!backpointer_exists, + c, inode_wrong_backpointer, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", @@ -1552,7 +1614,8 @@ static int check_dirent_target(struct btree_trans *trans, } } - if (fsck_err_on(d.v->d_type != inode_d_type(target), c, + if (fsck_err_on(d.v->d_type != inode_d_type(target), + c, dirent_d_type_wrong, "incorrect d_type: got %s, should be %s:\n%s", bch2_d_type_str(d.v->d_type), bch2_d_type_str(inode_d_type(target)), @@ -1576,7 +1639,8 @@ static int check_dirent_target(struct btree_trans *trans, if (d.v->d_type == DT_SUBVOL && target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) && (c->sb.version < bcachefs_metadata_version_subvol_dirent || - fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u", + fsck_err(c, dirent_d_parent_subvol_wrong, + "dirent has wrong d_parent_subvol field: got %u, should be %u", le32_to_cpu(d.v->d_parent_subvol), target->bi_parent_subvol))) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); @@ -1648,7 +1712,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); dir->first_this_inode = false; - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, "dirent in nonexisting directory:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1660,7 +1724,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (!i) goto out; - if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, + if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), + c, dirent_in_non_dir_inode, "dirent in non directory inode type %s:\n%s", bch2_d_type_str(inode_d_type(&i->inode)), (printbuf_reset(&buf), @@ -1694,7 +1759,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, c, dirent_to_missing_subvol, "dirent points to missing subvolume %u", le32_to_cpu(d.v->d_child_subvol))) { ret = __remove_dirent(trans, d.k->p); @@ -1706,7 +1771,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, c, subvol_to_missing_root, "subvolume %u points to missing subvolume root %llu", target_subvol, target_inum)) { @@ -1715,7 +1780,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, + if (fsck_err_on(subvol_root.bi_subvol != target_subvol, + c, subvol_root_wrong_bi_subvol, "subvol root %llu has wrong bi_subvol field: got %u, should be %u", target_inum, subvol_root.bi_subvol, target_subvol)) { @@ -1734,7 +1800,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (fsck_err_on(!target->inodes.nr, c, + if (fsck_err_on(!target->inodes.nr, + c, dirent_to_missing_inode, "dirent points to missing inode: (equiv %u)\n%s", equiv.snapshot, (printbuf_reset(&buf), @@ -1820,7 +1887,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); inode->first_this_inode = false; - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, xattr_in_missing_inode, "xattr for missing inode %llu", k.k->p.inode)) return bch2_btree_delete_at(trans, iter, 0); @@ -1869,7 +1936,8 @@ static int check_root_trans(struct btree_trans *trans) if (ret && !bch2_err_matches(ret, ENOENT)) return ret; - if (mustfix_fsck_err_on(ret, c, "root subvol missing")) { + if (mustfix_fsck_err_on(ret, c, root_subvol_missing, + "root subvol missing")) { struct bkey_i_subvolume root_subvol; snapshot = U32_MAX; @@ -1895,8 +1963,10 @@ static int check_root_trans(struct btree_trans *trans) if (ret && !bch2_err_matches(ret, ENOENT)) return ret; - if (mustfix_fsck_err_on(ret, c, "root directory missing") || - mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c, + if (mustfix_fsck_err_on(ret, c, root_dir_missing, + "root directory missing") || + mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), + c, root_inode_not_dir, "root inode not a directory")) { bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); @@ -2000,7 +2070,8 @@ static int check_path(struct btree_trans *trans, } if (bch2_err_matches(ret, ENOENT)) { - if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", + if (fsck_err(c, inode_unreachable, + "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", inode->bi_inum, snapshot, bch2_d_type_str(inode_d_type(inode)), inode->bi_nlink, @@ -2040,7 +2111,8 @@ static int check_path(struct btree_trans *trans, pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", inode->bi_inum, snapshot); - if (!fsck_err(c, "directory structure loop")) + if (!fsck_err(c, dir_loop, + "directory structure loop")) return 0; ret = commit_do(trans, NULL, NULL, @@ -2088,7 +2160,7 @@ int bch2_check_directory_structure(struct bch_fs *c) break; } - if (u.bi_flags & BCH_INODE_UNLINKED) + if (u.bi_flags & BCH_INODE_unlinked) continue; ret = check_path(trans, &path, &u, iter.pos.snapshot); @@ -2300,7 +2372,8 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite link = &links->d[++*idx]; } - if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, + if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, + c, inode_wrong_nlink, "inode %llu type %s has wrong i_nlink (%u, should be %u)", u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index 90c87b5089a0..da991e8cf27e 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -4,6 +4,7 @@ int bch2_check_inodes(struct bch_fs *); int bch2_check_extents(struct bch_fs *); +int bch2_check_indirect_extents(struct bch_fs *); int bch2_check_dirents(struct bch_fs *); int bch2_check_xattrs(struct bch_fs *); int bch2_check_root(struct bch_fs *); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index bb3f443d8381..def77f2d8802 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -6,6 +6,7 @@ #include "bkey_methods.h" #include "btree_update.h" #include "buckets.h" +#include "compress.h" #include "error.h" #include "extents.h" #include "extent_update.h" @@ -19,13 +20,18 @@ #include <asm/unaligned.h> -const char * const bch2_inode_opts[] = { #define x(name, ...) #name, +const char * const bch2_inode_opts[] = { BCH_INODE_OPTS() -#undef x NULL, }; +static const char * const bch2_inode_flag_strs[] = { + BCH_INODE_FLAGS() + NULL +}; +#undef x + static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; static int inode_decode_field(const u8 *in, const u8 *end, @@ -361,9 +367,10 @@ int bch2_inode_peek(struct btree_trans *trans, return ret; } -int bch2_inode_write(struct btree_trans *trans, +int bch2_inode_write_flags(struct btree_trans *trans, struct btree_iter *iter, - struct bch_inode_unpacked *inode) + struct bch_inode_unpacked *inode, + enum btree_update_flags flags) { struct bkey_inode_buf *inode_p; @@ -373,7 +380,7 @@ int bch2_inode_write(struct btree_trans *trans, bch2_inode_pack_inlined(inode_p, inode); inode_p->inode.k.p.snapshot = iter->snapshot; - return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); + return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) @@ -397,117 +404,121 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bch_inode_unpacked unpacked; + int ret = 0; - if (k.k->p.inode) { - prt_printf(err, "nonzero k.p.inode"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->p.offset < BLOCKDEV_INODE_MAX) { - prt_printf(err, "fs inode in blockdev range"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->p.inode, c, err, + inode_pos_inode_nonzero, + "nonzero k.p.inode"); - if (bch2_inode_unpack(k, &unpacked)) { - prt_printf(err, "invalid variable length fields"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, + inode_pos_blockdev_range, + "fs inode in blockdev range"); - if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) { - prt_printf(err, "invalid data checksum type (%u >= %u", - unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, + inode_unpack_error, + "invalid variable length fields"); - if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) { - prt_printf(err, "invalid data checksum type (%u >= %u)", - unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, + inode_checksum_type_invalid, + "invalid data checksum type (%u >= %u", + unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); - if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && - unpacked.bi_nlink != 0) { - prt_printf(err, "flagged as unlinked but bi_nlink != 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(unpacked.bi_compression && + !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, + inode_compression_type_invalid, + "invalid compression opt %u", unpacked.bi_compression - 1); - if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode)) { - prt_printf(err, "subvolume root but not a directory"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && + unpacked.bi_nlink != 0, c, err, + inode_unlinked_but_nlink_nonzero, + "flagged as unlinked but bi_nlink != 0"); - return 0; + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, + inode_subvol_root_but_not_dir, + "subvolume root but not a directory"); +fsck_err: + return ret; } -int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); + int ret = 0; - if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } -int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); + int ret = 0; - if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } -int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + int ret = 0; - if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) { - prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)", - INODEv3_FIELDS_START(inode.v), - INODEv3_FIELDS_START_INITIAL, - bkey_val_u64s(inode.k)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, + inode_v3_fields_start_bad, + "invalid fields_start (got %llu, min %u max %zu)", + INODEv3_FIELDS_START(inode.v), + INODEv3_FIELDS_START_INITIAL, + bkey_val_u64s(inode.k)); - if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { - prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu", - inode->bi_mode, inode->bi_flags, + prt_printf(out, "mode=%o ", inode->bi_mode); + + prt_str(out, "flags="); + prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); + prt_printf(out, " (%x)", inode->bi_flags); + + prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu", inode->bi_journal_seq, inode->bi_size, inode->bi_sectors, inode->bi_version); #define x(_name, _bits) \ - prt_printf(out, " "#_name " %llu", (u64) inode->_name); + prt_printf(out, " "#_name "=%llu", (u64) inode->_name); BCH_INODE_FIELDS_v3() #undef x } @@ -546,7 +557,7 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k) static inline bool bkey_is_deleted_inode(struct bkey_s_c k) { - return bkey_inode_flags(k) & BCH_INODE_UNLINKED; + return bkey_inode_flags(k) & BCH_INODE_unlinked; } int bch2_trans_mark_inode(struct btree_trans *trans, @@ -610,16 +621,17 @@ int bch2_mark_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->p.inode) { - prt_printf(err, "nonzero k.p.inode"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(k.k->p.inode, c, err, + inode_pos_inode_nonzero, + "nonzero k.p.inode"); +fsck_err: + return ret; } void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, @@ -926,8 +938,8 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) { - if (bi->bi_flags & BCH_INODE_UNLINKED) - bi->bi_flags &= ~BCH_INODE_UNLINKED; + if (bi->bi_flags & BCH_INODE_unlinked) + bi->bi_flags &= ~BCH_INODE_unlinked; else { if (bi->bi_nlink == U32_MAX) return -EINVAL; @@ -940,13 +952,13 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) { - if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) { + if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) { bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero", bi->bi_inum); return; } - if (bi->bi_flags & BCH_INODE_UNLINKED) { + if (bi->bi_flags & BCH_INODE_unlinked) { bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum); return; } @@ -954,7 +966,7 @@ void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked * if (bi->bi_nlink) bi->bi_nlink--; else - bi->bi_flags |= BCH_INODE_UNLINKED; + bi->bi_flags |= BCH_INODE_unlinked; } struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) @@ -979,6 +991,18 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; } +int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) +{ + struct bch_inode_unpacked inode; + int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); + + if (ret) + return ret; + + bch2_inode_opts_get(opts, trans->c, &inode); + return 0; +} + int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; @@ -1042,53 +1066,85 @@ err: return ret ?: -BCH_ERR_transaction_restart_nested; } -static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) +static int may_delete_deleted_inode(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos pos, + bool *need_another_pass) { struct bch_fs *c = trans->c; - struct btree_iter iter; + struct btree_iter inode_iter; struct bkey_s_c k; struct bch_inode_unpacked inode; int ret; - if (bch2_snapshot_is_internal_node(c, pos.snapshot)) - return 0; - - if (!fsck_err_on(c->sb.clean, c, - "filesystem marked as clean but have deleted inode %llu:%u", - pos.offset, pos.snapshot)) - return 0; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); + k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); ret = bkey_err(k); if (ret) return ret; ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; if (fsck_err_on(!bkey_is_inode(k.k), c, + deleted_inode_missing, "nonexistent inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; ret = bch2_inode_unpack(k, &inode); if (ret) - goto err; + goto out; if (fsck_err_on(S_ISDIR(inode.bi_mode), c, + deleted_inode_is_dir, "directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c, + if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, + deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; - return 1; -err: + if (c->sb.clean && + !fsck_err(c, + deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) { + ret = 0; + goto out; + } + + if (bch2_snapshot_is_internal_node(c, pos.snapshot)) { + struct bpos new_min_pos; + + ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos); + if (ret) + goto out; + + inode.bi_flags &= ~BCH_INODE_unlinked; + + ret = bch2_inode_write_flags(trans, &inode_iter, &inode, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch_err_msg(c, ret, "clearing inode unlinked flag"); + if (ret) + goto out; + + /* + * We'll need another write buffer flush to pick up the new + * unlinked inodes in the snapshot leaves: + */ + *need_another_pass = true; + return 0; + } + + ret = 1; +out: fsck_err: + bch2_trans_iter_exit(trans, &inode_iter); return ret; delete: - return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); + ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); + goto out; } int bch2_delete_dead_inodes(struct bch_fs *c) @@ -1096,7 +1152,10 @@ int bch2_delete_dead_inodes(struct bch_fs *c) struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; + bool need_another_pass; int ret; +again: + need_another_pass = false; ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) @@ -1110,7 +1169,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) */ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p)); + ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p, + &need_another_pass)); if (ret < 0) break; @@ -1120,12 +1180,17 @@ int bch2_delete_dead_inodes(struct bch_fs *c) bch2_fs_lazy_rw(c); } + bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); + ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } } bch2_trans_iter_exit(trans, &iter); + + if (!ret && need_another_pass) + goto again; err: bch2_trans_put(trans); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index a7464e1b6960..88818a332b1e 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -3,16 +3,17 @@ #define _BCACHEFS_INODE_H #include "bkey.h" +#include "bkey_methods.h" #include "opts.h" enum bkey_invalid_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -52,7 +53,7 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -101,8 +102,16 @@ void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *) int bch2_inode_peek(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, subvol_inum, unsigned); -int bch2_inode_write(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *); + +int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, enum btree_update_flags); + +static inline int bch2_inode_write(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode) +{ + return bch2_inode_write_flags(trans, iter, inode, 0); +} void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); @@ -177,7 +186,7 @@ static inline unsigned nlink_bias(umode_t mode) static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi) { - return bi->bi_flags & BCH_INODE_UNLINKED + return bi->bi_flags & BCH_INODE_unlinked ? 0 : bi->bi_nlink + nlink_bias(bi->bi_mode); } @@ -187,10 +196,10 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, { if (nlink) { bi->bi_nlink = nlink - nlink_bias(bi->bi_mode); - bi->bi_flags &= ~BCH_INODE_UNLINKED; + bi->bi_flags &= ~BCH_INODE_unlinked; } else { bi->bi_nlink = 0; - bi->bi_flags |= BCH_INODE_UNLINKED; + bi->bi_flags |= BCH_INODE_unlinked; } } @@ -200,6 +209,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); +int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); int bch2_delete_dead_inodes(struct bch_fs *); diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 119834cb8f9e..bebc11444ef5 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -16,13 +16,14 @@ #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" +#include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, - unsigned sectors, + u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) @@ -104,7 +105,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, if (ret) goto err; - sectors = min(sectors, wp->sectors_free); + sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); @@ -355,6 +356,7 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; + struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; @@ -363,6 +365,10 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, bool insert = shift > 0; int ret = 0; + ret = bch2_inum_opts_get(trans, inum, &opts); + if (ret) + return ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_INTENT); @@ -443,7 +449,10 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + ret = bch2_bkey_set_needs_rebalance(c, copy, + opts.background_target, + opts.background_compression) ?: + bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index c9e6ed40e1b8..9cb44a7c43c1 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -3,7 +3,7 @@ #define _BCACHEFS_IO_MISC_H int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, - unsigned, struct bch_io_opts, s64 *, + u64, struct bch_io_opts, s64 *, struct write_point_specifier); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 443c3ea65527..a56ed553dc15 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -643,7 +643,7 @@ csum_err: "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); - bch2_io_error(ca); + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; decompression_err: @@ -677,7 +677,7 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, rbio->read_pos.inode, rbio->read_pos.offset, "data read error: %s", @@ -1025,7 +1025,7 @@ get_bio: trans->notrace_relock_fail = true; } else { /* Attempting reconstruct read: */ - if (bch2_ec_read_extent(c, rbio)) { + if (bch2_ec_read_extent(trans, rbio)) { bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 6e4f85eb6ec8..f02b3f7d26a0 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -202,6 +202,17 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, struct btree_iter iter; struct bkey_i *k; struct bkey_i_inode_v3 *inode; + /* + * Crazy performance optimization: + * Every extent update needs to also update the inode: the inode trigger + * will set bi->journal_seq to the journal sequence number of this + * transaction - for fsync. + * + * But if that's the only reason we're updating the inode (we're not + * updating bi_size or bi_sectors), then we don't need the inode update + * to be journalled - if we crash, the bi_journal_seq update will be + * lost, but that's fine. + */ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; int ret; @@ -223,7 +234,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, inode = bkey_i_to_inode_v3(k); - if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) && + if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) && new_i_size > le64_to_cpu(inode->v.bi_size)) { inode->v.bi_size = cpu_to_le64(new_i_size); inode_update_flags = 0; @@ -351,10 +362,13 @@ static int bch2_write_index_default(struct bch_write_op *op) bkey_start_pos(&sk.k->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - ret = bch2_extent_update(trans, inum, &iter, sk.k, - &op->res, - op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_CHECK_ENOSPC); + ret = bch2_bkey_set_needs_rebalance(c, sk.k, + op->opts.background_target, + op->opts.background_compression) ?: + bch2_extent_update(trans, inum, &iter, sk.k, + &op->res, + op->new_i_size, &op->i_sectors_delta, + op->flags & BCH_WRITE_CHECK_ENOSPC); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -495,7 +509,6 @@ static void __bch2_write_index(struct bch_write_op *op) { struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; - struct bkey_i *k; unsigned dev; int ret = 0; @@ -505,14 +518,6 @@ static void __bch2_write_index(struct bch_write_op *op) goto err; } - /* - * probably not the ideal place to hook this in, but I don't - * particularly want to plumb io_opts all the way through the btree - * update stack right now - */ - for_each_keylist_key(keys, k) - bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts); - if (!bch2_keylist_empty(keys)) { u64 sectors_start = keylist_sectors(keys); @@ -643,7 +648,7 @@ static void bch2_write_endio(struct bio *bio) struct bch_fs *c = wbio->c; struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, op->pos.inode, wbio->inode_offset << 9, "data write error: %s", @@ -816,6 +821,7 @@ static enum prep_encoded_ret { /* Can we just write the entire extent as is? */ if (op->crc.uncompressed_size == op->crc.live_size && + op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 && op->crc.compressed_size <= wp->sectors_free && (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || op->incompressible)) { @@ -1091,9 +1097,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, e = bkey_s_c_to_extent(k); extent_for_each_ptr_decode(e, p, entry) { - if (p.crc.csum_type || - crc_is_compressed(p.crc) || - p.has_ec) + if (crc_is_encoded(p.crc) || p.has_ec) return false; replicas += bch2_extent_ptr_durability(c, &p); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 0e7a9ffa3671..5b5d69f2316b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1019,6 +1019,25 @@ err: return ret; } +int bch2_fs_journal_alloc(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_online_member(ca, c, i) { + if (ca->journal.nr) + continue; + + int ret = bch2_dev_journal_alloc(ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + return 0; +} + /* startup/shutdown: */ static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 491133cc52f3..011711e99c8d 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -534,6 +534,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 6a3d6a374e9c..f4bc2cdbfdd7 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -140,7 +140,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, if (!dup->csum_good) goto replace; - fsck_err(c, "found duplicate but non identical journal entries (seq %llu)", + fsck_err(c, journal_entry_replicas_data_mismatch, + "found duplicate but non identical journal entries (seq %llu)", le64_to_cpu(j->seq)); i = dup; goto found; @@ -235,7 +236,7 @@ static void journal_entry_err_msg(struct printbuf *out, prt_str(out, ": "); } -#define journal_entry_err(c, version, jset, entry, msg, ...) \ +#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ ({ \ struct printbuf _buf = PRINTBUF; \ \ @@ -244,9 +245,10 @@ static void journal_entry_err_msg(struct printbuf *out, \ switch (flags & BKEY_INVALID_WRITE) { \ case READ: \ - mustfix_fsck_err(c, "%s", _buf.buf); \ + mustfix_fsck_err(c, _err, "%s", _buf.buf); \ break; \ case WRITE: \ + bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\ if (bch2_fs_inconsistent(c)) { \ ret = -BCH_ERR_fsck_errors_not_fixed; \ @@ -259,8 +261,8 @@ static void journal_entry_err_msg(struct printbuf *out, true; \ }) -#define journal_entry_err_on(cond, c, version, jset, entry, msg, ...) \ - ((cond) ? journal_entry_err(c, version, jset, entry, msg, ##__VA_ARGS__) : false) +#define journal_entry_err_on(cond, ...) \ + ((cond) ? journal_entry_err(__VA_ARGS__) : false) #define FSCK_DELETED_KEY 5 @@ -277,7 +279,10 @@ static int journal_validate_key(struct bch_fs *c, struct printbuf buf = PRINTBUF; int ret = 0; - if (journal_entry_err_on(!k->k.u64s, c, version, jset, entry, "k->u64s 0")) { + if (journal_entry_err_on(!k->k.u64s, + c, version, jset, entry, + journal_entry_bkey_u64s_0, + "k->u64s 0")) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(vstruct_next(entry), next); return FSCK_DELETED_KEY; @@ -286,6 +291,7 @@ static int journal_validate_key(struct bch_fs *c, if (journal_entry_err_on((void *) bkey_next(k) > (void *) vstruct_next(entry), c, version, jset, entry, + journal_entry_bkey_past_end, "extends past end of journal entry")) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(vstruct_next(entry), next); @@ -294,6 +300,7 @@ static int journal_validate_key(struct bch_fs *c, if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c, version, jset, entry, + journal_entry_bkey_bad_format, "bad format %u", k->k.format)) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -317,7 +324,8 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id), write, &buf); - mustfix_fsck_err(c, "%s", buf.buf); + mustfix_fsck_err(c, journal_entry_bkey_invalid, + "%s", buf.buf); le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -369,7 +377,7 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs prt_newline(out); prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); } - prt_printf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level); + prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); first = false; } @@ -387,6 +395,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, if (journal_entry_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, version, jset, entry, + journal_entry_btree_root_bad_size, "invalid btree root journal entry: wrong number of keys")) { void *next = vstruct_next(entry); /* @@ -436,6 +445,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c, if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c, version, jset, entry, + journal_entry_blacklist_bad_size, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); } @@ -463,6 +473,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c, version, jset, entry, + journal_entry_blacklist_v2_bad_size, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); goto out; @@ -473,6 +484,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, if (journal_entry_err_on(le64_to_cpu(bl_entry->start) > le64_to_cpu(bl_entry->end), c, version, jset, entry, + journal_entry_blacklist_v2_start_past_end, "invalid journal seq blacklist entry: start > end")) { journal_entry_null_range(entry, vstruct_next(entry)); } @@ -505,6 +517,7 @@ static int journal_entry_usage_validate(struct bch_fs *c, if (journal_entry_err_on(bytes < sizeof(*u), c, version, jset, entry, + journal_entry_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -539,6 +552,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, if (journal_entry_err_on(bytes < sizeof(*u) || bytes < sizeof(*u) + u->r.nr_devs, c, version, jset, entry, + journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -570,13 +584,17 @@ static int journal_entry_clock_validate(struct bch_fs *c, int ret = 0; if (journal_entry_err_on(bytes != sizeof(*clock), - c, version, jset, entry, "bad size")) { + c, version, jset, entry, + journal_entry_clock_bad_size, + "bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } if (journal_entry_err_on(clock->rw > 1, - c, version, jset, entry, "bad rw")) { + c, version, jset, entry, + journal_entry_clock_bad_rw, + "bad rw")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } @@ -608,7 +626,9 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c, int ret = 0; if (journal_entry_err_on(bytes < expected, - c, version, jset, entry, "bad size (%u < %u)", + c, version, jset, entry, + journal_entry_dev_usage_bad_size, + "bad size (%u < %u)", bytes, expected)) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -617,13 +637,17 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c, dev = le32_to_cpu(u->dev); if (journal_entry_err_on(!bch2_dev_exists2(c, dev), - c, version, jset, entry, "bad dev")) { + c, version, jset, entry, + journal_entry_dev_usage_bad_dev, + "bad dev")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } if (journal_entry_err_on(u->pad, - c, version, jset, entry, "bad pad")) { + c, version, jset, entry, + journal_entry_dev_usage_bad_pad, + "bad pad")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } @@ -738,7 +762,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, vstruct_for_each(jset, entry) { if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), - c, version, jset, entry, + c, version, jset, entry, + journal_entry_past_jset_end, "journal entry extends past end of jset")) { jset->u64s = cpu_to_le32((u64 *) entry - jset->_data); break; @@ -767,6 +792,7 @@ static int jset_validate(struct bch_fs *c, version = le32_to_cpu(jset->version); if (journal_entry_err_on(!bch2_version_compatible(version), c, version, jset, NULL, + jset_unsupported_version, "%s sector %llu seq %llu: incompatible journal entry version %u.%u", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -777,7 +803,8 @@ static int jset_validate(struct bch_fs *c, } if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), - c, version, jset, NULL, + c, version, jset, NULL, + jset_unknown_csum, "%s sector %llu seq %llu: journal entry with unknown csum type %llu", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -788,6 +815,7 @@ static int jset_validate(struct bch_fs *c, if (journal_entry_err_on(!JSET_NO_FLUSH(jset) && le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c, version, jset, NULL, + jset_last_seq_newer_than_seq, "invalid journal entry: last_seq > seq (%llu > %llu)", le64_to_cpu(jset->last_seq), le64_to_cpu(jset->seq))) { @@ -816,7 +844,8 @@ static int jset_validate_early(struct bch_fs *c, version = le32_to_cpu(jset->version); if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, + c, version, jset, NULL, + jset_unsupported_version, "%s sector %llu seq %llu: unknown journal entry version %u.%u", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -831,7 +860,8 @@ static int jset_validate_early(struct bch_fs *c, return JOURNAL_ENTRY_REREAD; if (journal_entry_err_on(bytes > bucket_sectors_left << 9, - c, version, jset, NULL, + c, version, jset, NULL, + jset_past_bucket_end, "%s sector %llu seq %llu: journal entry too big (%zu bytes)", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), bytes)) @@ -900,7 +930,7 @@ reread: ret = submit_bio_wait(bio); kfree(bio); - if (bch2_dev_io_err_on(ret, ca, + if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read, "journal read error: sector %llu", offset) || bch2_meta_read_fault("journal")) { @@ -956,7 +986,8 @@ reread: ja->bucket_seq[bucket] = le64_to_cpu(j->seq); csum_good = jset_csum_good(c, j); - if (!csum_good) + if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, + "journal checksum error")) saw_bad = true; ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), @@ -1172,6 +1203,7 @@ int bch2_journal_read(struct bch_fs *c, if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), c, le32_to_cpu(i->j.version), &i->j, NULL, + jset_last_seq_newer_than_seq, "invalid journal entry: last_seq > seq (%llu > %llu)", le64_to_cpu(i->j.last_seq), le64_to_cpu(i->j.seq))) @@ -1188,7 +1220,8 @@ int bch2_journal_read(struct bch_fs *c, } if (!*last_seq) { - fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); + fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes, + "journal read done, but no entries found after dropping non-flushes"); return 0; } @@ -1214,6 +1247,7 @@ int bch2_journal_read(struct bch_fs *c, if (bch2_journal_seq_is_blacklisted(c, seq, true)) { fsck_err_on(!JSET_NO_FLUSH(&i->j), c, + jset_seq_blacklisted, "found blacklisted journal entry %llu", seq); i->ignore = true; } @@ -1254,7 +1288,8 @@ int bch2_journal_read(struct bch_fs *c, bch2_journal_ptrs_to_text(&buf2, c, i); missing_end = seq - 1; - fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" + fsck_err(c, journal_entries_missing, + "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" " prev at %s\n" " next at %s", missing_start, missing_end, @@ -1309,7 +1344,8 @@ int bch2_journal_read(struct bch_fs *c, if (!degraded && !bch2_replicas_marked(c, &replicas.e) && (le64_to_cpu(i->j.seq) == *last_seq || - fsck_err(c, "superblock not marked as containing replicas for journal entry %llu\n %s", + fsck_err(c, journal_entry_replicas_not_marked, + "superblock not marked as containing replicas for journal entry %llu\n %s", le64_to_cpu(i->j.seq), buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) @@ -1581,7 +1617,8 @@ static void journal_write_endio(struct bio *bio) struct journal_buf *w = journal_last_unwritten_buf(j); unsigned long flags; - if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + "error writing journal entry %llu: %s", le64_to_cpu(w->data->seq), bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("journal")) { @@ -1641,9 +1678,15 @@ static void do_journal_write(struct closure *cl) continue_at(cl, journal_write_done, c->io_complete_wq); } -static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset) +static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { - struct jset_entry *i, *next, *prev = NULL; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct jset_entry *start, *end, *i, *next, *prev = NULL; + struct jset *jset = w->data; + unsigned sectors, bytes, u64s; + bool validate_before_checksum = false; + unsigned long btree_roots_have = 0; + int ret; /* * Simple compaction, dropping empty jset_entries (from journal @@ -1660,8 +1703,20 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset if (!u64s) continue; - if (i->type == BCH_JSET_ENTRY_btree_root) + /* + * New btree roots are set by journalling them; when the journal + * entry gets written we have to propagate them to + * c->btree_roots + * + * But, every journal entry we write has to contain all the + * btree roots (at least for now); so after we copy btree roots + * to c->btree_roots we have to get any missing btree roots and + * add them to this journal entry: + */ + if (i->type == BCH_JSET_ENTRY_btree_root) { bch2_journal_entry_to_btree_root(c, i); + __set_bit(i->btree_id, &btree_roots_have); + } /* Can we merge with previous entry? */ if (prev && @@ -1685,85 +1740,10 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset prev = prev ? vstruct_next(prev) : jset->start; jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); -} - -void bch2_journal_write(struct closure *cl) -{ - struct journal *j = container_of(cl, struct journal, io); - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; - struct journal_buf *w = journal_last_unwritten_buf(j); - struct bch_replicas_padded replicas; - struct jset_entry *start, *end; - struct jset *jset; - struct bio *bio; - struct printbuf journal_debug_buf = PRINTBUF; - bool validate_before_checksum = false; - unsigned i, sectors, bytes, u64s, nr_rw_members = 0; - int ret; - - BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); - - journal_buf_realloc(j, w); - jset = w->data; - - j->write_start_time = local_clock(); - - spin_lock(&j->lock); - - /* - * If the journal is in an error state - we did an emergency shutdown - - * we prefer to continue doing journal writes. We just mark them as - * noflush so they'll never be used, but they'll still be visible by the - * list_journal tool - this helps in debugging. - * - * There's a caveat: the first journal write after marking the - * superblock dirty must always be a flush write, because on startup - * from a clean shutdown we didn't necessarily read the journal and the - * new journal write might overwrite whatever was in the journal - * previously - we can't leave the journal without any flush writes in - * it. - * - * So if we're in an error state, and we're still starting up, we don't - * write anything at all. - */ - if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) && - (bch2_journal_error(j) || - w->noflush || - (!w->must_flush && - (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && - test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) { - w->noflush = true; - SET_JSET_NO_FLUSH(jset, true); - jset->last_seq = 0; - w->last_seq = 0; - - j->nr_noflush_writes++; - } else if (!bch2_journal_error(j)) { - j->last_flush_write = jiffies; - j->nr_flush_writes++; - clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); - } else { - spin_unlock(&j->lock); - goto err; - } - spin_unlock(&j->lock); - - /* - * New btree roots are set by journalling them; when the journal entry - * gets written we have to propagate them to c->btree_roots - * - * But, every journal entry we write has to contain all the btree roots - * (at least for now); so after we copy btree roots to c->btree_roots we - * have to get any missing btree roots and add them to this journal - * entry: - */ - - bch2_journal_entries_postprocess(c, jset); start = end = vstruct_last(jset); - end = bch2_btree_roots_to_journal_entries(c, jset->start, end); + end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); bch2_journal_super_entries_add_common(c, &end, le64_to_cpu(jset->seq)); @@ -1779,7 +1759,7 @@ void bch2_journal_write(struct closure *cl) bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)", vstruct_bytes(jset), w->sectors << 9, u64s, w->u64s_reserved, j->entry_u64s_reserved); - goto err; + return -EINVAL; } jset->magic = cpu_to_le64(jset_magic(c)); @@ -1798,37 +1778,117 @@ void bch2_journal_write(struct closure *cl) validate_before_checksum = true; if (validate_before_checksum && - jset_validate(c, NULL, jset, 0, WRITE)) - goto err; + (ret = jset_validate(c, NULL, jset, 0, WRITE))) + return ret; ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); if (bch2_fs_fatal_err_on(ret, c, "error decrypting journal entry: %i", ret)) - goto err; + return ret; jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); if (!validate_before_checksum && - jset_validate(c, NULL, jset, 0, WRITE)) - goto err; + (ret = jset_validate(c, NULL, jset, 0, WRITE))) + return ret; memset((void *) jset + bytes, 0, (sectors << 9) - bytes); + return 0; +} + +static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + int error = bch2_journal_error(j); + + /* + * If the journal is in an error state - we did an emergency shutdown - + * we prefer to continue doing journal writes. We just mark them as + * noflush so they'll never be used, but they'll still be visible by the + * list_journal tool - this helps in debugging. + * + * There's a caveat: the first journal write after marking the + * superblock dirty must always be a flush write, because on startup + * from a clean shutdown we didn't necessarily read the journal and the + * new journal write might overwrite whatever was in the journal + * previously - we can't leave the journal without any flush writes in + * it. + * + * So if we're in an error state, and we're still starting up, we don't + * write anything at all. + */ + if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags)) + return -EIO; + + if (error || + w->noflush || + (!w->must_flush && + (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && + test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { + w->noflush = true; + SET_JSET_NO_FLUSH(w->data, true); + w->data->last_seq = 0; + w->last_seq = 0; + + j->nr_noflush_writes++; + } else { + j->last_flush_write = jiffies; + j->nr_flush_writes++; + clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); + } + + return 0; +} + +void bch2_journal_write(struct closure *cl) +{ + struct journal *j = container_of(cl, struct journal, io); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; + struct journal_buf *w = journal_last_unwritten_buf(j); + struct bch_replicas_padded replicas; + struct bio *bio; + struct printbuf journal_debug_buf = PRINTBUF; + unsigned i, nr_rw_members = 0; + int ret; + + BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + + j->write_start_time = local_clock(); -retry_alloc: spin_lock(&j->lock); - ret = journal_write_alloc(j, w); + ret = bch2_journal_write_pick_flush(j, w); + spin_unlock(&j->lock); + if (ret) + goto err; + + journal_buf_realloc(j, w); + + ret = bch2_journal_write_prep(j, w); + if (ret) + goto err; + + while (1) { + spin_lock(&j->lock); + ret = journal_write_alloc(j, w); + if (!ret || !j->can_discard) + break; - if (ret && j->can_discard) { spin_unlock(&j->lock); bch2_journal_do_discards(j); - goto retry_alloc; } - if (ret) + if (ret) { __bch2_journal_debug_to_text(&journal_debug_buf, j); + spin_unlock(&j->lock); + bch_err(c, "Unable to allocate journal write:\n%s", + journal_debug_buf.buf); + printbuf_exit(&journal_debug_buf); + goto err; + } /* * write is allocated, no longer need to account for it in @@ -1843,13 +1903,6 @@ retry_alloc: bch2_journal_space_available(j); spin_unlock(&j->lock); - if (ret) { - bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf.buf); - printbuf_exit(&journal_debug_buf); - goto err; - } - w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); if (c->opts.nochanges) @@ -1871,7 +1924,7 @@ retry_alloc: if (ret) goto err; - if (!JSET_NO_FLUSH(jset) && w->separate_flush) { + if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { for_each_rw_member(ca, c, i) { percpu_ref_get(&ca->io_ref); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 215a653322f3..a5cc0ed195d6 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,17 +10,17 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (!lru_pos_time(k.k->p)) { - prt_printf(err, "lru entry at time=0"); - return -BCH_ERR_invalid_bkey; - - } + int ret = 0; - return 0; + bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, + lru_entry_at_time_0, + "lru entry at time=0"); +fsck_err: + return ret; } void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c, @@ -95,6 +95,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, + lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) return bch2_btree_delete_at(trans, lru_iter, 0); @@ -125,7 +126,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, } if (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect lru entry: lru %s time %llu\n" + fsck_err(c, lru_entry_bad, + "incorrect lru entry: lru %s time %llu\n" " %s\n" " for %s", bch2_lru_types[type], diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index be66bf9ad809..429dca816df5 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -48,7 +48,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 39a14e321680..ab749bf2fcbc 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -20,6 +20,7 @@ #include "keylist.h" #include "move.h" #include "replicas.h" +#include "snapshot.h" #include "super-io.h" #include "trace.h" @@ -59,20 +60,6 @@ static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c } } -static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats) -{ - mutex_lock(&c->data_progress_lock); - list_add(&stats->list, &c->data_progress_list); - mutex_unlock(&c->data_progress_lock); -} - -static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats) -{ - mutex_lock(&c->data_progress_lock); - list_del(&stats->list); - mutex_unlock(&c->data_progress_lock); -} - struct moving_io { struct list_head read_list; struct list_head io_list; @@ -156,35 +143,31 @@ static void move_read_endio(struct bio *bio) closure_put(&ctxt->cl); } -void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt, - struct btree_trans *trans) +void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) { struct moving_io *io; - if (trans) - bch2_trans_unlock(trans); - while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { + bch2_trans_unlock_long(ctxt->trans); list_del(&io->read_list); move_write(io); } } -static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt, - struct btree_trans *trans) +void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) { unsigned sectors_pending = atomic_read(&ctxt->write_sectors); - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, !atomic_read(&ctxt->write_sectors) || atomic_read(&ctxt->write_sectors) != sectors_pending); } void bch2_moving_ctxt_exit(struct moving_context *ctxt) { - struct bch_fs *c = ctxt->c; + struct bch_fs *c = ctxt->trans->c; - move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); closure_sync(&ctxt->cl); EBUG_ON(atomic_read(&ctxt->write_sectors)); @@ -192,16 +175,12 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) EBUG_ON(atomic_read(&ctxt->read_sectors)); EBUG_ON(atomic_read(&ctxt->read_ios)); - if (ctxt->stats) { - progress_list_del(c, ctxt->stats); - trace_move_data(c, - atomic64_read(&ctxt->stats->sectors_moved), - atomic64_read(&ctxt->stats->keys_moved)); - } - mutex_lock(&c->moving_context_lock); list_del(&ctxt->list); mutex_unlock(&c->moving_context_lock); + + bch2_trans_put(ctxt->trans); + memset(ctxt, 0, sizeof(*ctxt)); } void bch2_moving_ctxt_init(struct moving_context *ctxt, @@ -213,7 +192,7 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, { memset(ctxt, 0, sizeof(*ctxt)); - ctxt->c = c; + ctxt->trans = bch2_trans_get(c); ctxt->fn = (void *) _RET_IP_; ctxt->rate = rate; ctxt->stats = stats; @@ -230,16 +209,17 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, mutex_lock(&c->moving_context_lock); list_add(&ctxt->list, &c->moving_context_list); mutex_unlock(&c->moving_context_lock); +} - if (stats) { - progress_list_add(c, stats); - stats->data_type = BCH_DATA_user; - } +void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) +{ + trace_move_data(c, stats); } void bch2_move_stats_init(struct bch_move_stats *stats, char *name) { memset(stats, 0, sizeof(*stats)); + stats->data_type = BCH_DATA_user; scnprintf(stats->name, sizeof(stats->name), "%s", name); } @@ -286,15 +266,14 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); } -static int bch2_move_extent(struct btree_trans *trans, - struct btree_iter *iter, - struct moving_context *ctxt, - struct move_bucket_in_flight *bucket_in_flight, - struct bch_io_opts io_opts, - enum btree_id btree_id, - struct bkey_s_c k, - struct data_update_opts data_opts) +int bch2_move_extent(struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, + struct btree_iter *iter, + struct bkey_s_c k, + struct bch_io_opts io_opts, + struct data_update_opts data_opts) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct moving_io *io; @@ -303,6 +282,8 @@ static int bch2_move_extent(struct btree_trans *trans, unsigned sectors = k.k->size, pages; int ret = -ENOMEM; + if (ctxt->stats) + ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); trace_move_extent2(c, k); bch2_data_update_opts_normalize(k, &data_opts); @@ -355,7 +336,7 @@ static int bch2_move_extent(struct btree_trans *trans, io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, btree_id, k); + io_opts, data_opts, iter->btree_id, k); if (ret && ret != -BCH_ERR_unwritten_extent_update) goto err_free_pages; @@ -367,9 +348,11 @@ static int bch2_move_extent(struct btree_trans *trans, BUG_ON(ret); - io->write.ctxt = ctxt; io->write.op.end_io = move_write_done; + if (ctxt->rate) + bch2_ratelimit_increment(ctxt->rate, k.k->size); + if (ctxt->stats) { atomic64_inc(&ctxt->stats->keys_moved); atomic64_add(k.k->size, &ctxt->stats->sectors_moved); @@ -399,7 +382,7 @@ static int bch2_move_extent(struct btree_trans *trans, closure_get(&ctxt->cl); bch2_read_extent(trans, &io->rbio, bkey_start_pos(k.k), - btree_id, k, 0, + iter->btree_id, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -413,45 +396,96 @@ err: return ret; } -static int lookup_inode(struct btree_trans *trans, struct bpos pos, - struct bch_inode_unpacked *inode) +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (io_opts->cur_inum != extent_k.k->p.inode) { + struct btree_iter iter; + struct bkey_s_c k; + + io_opts->d.nr = 0; + + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (k.k->p.offset != extent_k.k->p.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + BUG_ON(bch2_inode_unpack(k, &inode)); + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); + + ret = darray_push(&io_opts->d, e); + if (ret) + break; + } + bch2_trans_iter_exit(trans, &iter); + io_opts->cur_inum = extent_k.k->p.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) { + struct snapshot_io_opts_entry *i; + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + } + + return &io_opts->fs_io_opts; +} + +int bch2_move_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct bkey_s_c extent_k) { struct btree_iter iter; struct bkey_s_c k; int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(&iter); + /* reflink btree? */ + if (!extent_k.k->p.inode) { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); + return 0; + } + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_CACHED); ret = bkey_err(k); - if (ret) - goto err; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; - if (!k.k || !bkey_eq(k.k->p, pos)) { - ret = -BCH_ERR_ENOENT_inode; - goto err; + if (!ret && bkey_is_inode(k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(k, &inode); + bch2_inode_opts_get(io_opts, trans->c, &inode); + } else { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); } - ret = bkey_is_inode(k.k) ? 0 : -EIO; - if (ret) - goto err; - - ret = bch2_inode_unpack(k, inode); - if (ret) - goto err; -err: bch2_trans_iter_exit(trans, &iter); - return ret; + return 0; } -static int move_ratelimit(struct btree_trans *trans, - struct moving_context *ctxt) +int bch2_move_ratelimit(struct moving_context *ctxt) { - struct bch_fs *c = trans->c; + struct bch_fs *c = ctxt->trans->c; u64 delay; - if (ctxt->wait_on_copygc) { - bch2_trans_unlock(trans); + if (ctxt->wait_on_copygc && !c->copygc_running) { + bch2_trans_unlock_long(ctxt->trans); wait_event_killable(c->copygc_running_wq, !c->copygc_running || kthread_should_stop()); @@ -460,8 +494,12 @@ static int move_ratelimit(struct btree_trans *trans, do { delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; + if (delay) { - bch2_trans_unlock(trans); + if (delay > HZ / 10) + bch2_trans_unlock_long(ctxt->trans); + else + bch2_trans_unlock(ctxt->trans); set_current_state(TASK_INTERRUPTIBLE); } @@ -474,7 +512,7 @@ static int move_ratelimit(struct btree_trans *trans, schedule_timeout(delay); if (unlikely(freezing(current))) { - move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads)); + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); try_to_freeze(); } } while (delay); @@ -483,7 +521,7 @@ static int move_ratelimit(struct btree_trans *trans, * XXX: these limits really ought to be per device, SSDs and hard drives * will want different limits */ - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 && atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 && atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight && @@ -492,52 +530,28 @@ static int move_ratelimit(struct btree_trans *trans, return 0; } -static int move_get_io_opts(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct bkey_s_c k, u64 *cur_inum) -{ - struct bch_inode_unpacked inode; - int ret; - - if (*cur_inum == k.k->p.inode) - return 0; - - ret = lookup_inode(trans, - SPOS(0, k.k->p.inode, k.k->p.snapshot), - &inode); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - - if (!ret) - bch2_inode_opts_get(io_opts, trans->c, &inode); - else - *io_opts = bch2_opts_to_inode_opts(trans->c->opts); - *cur_inum = k.k->p.inode; - return 0; -} - -static int __bch2_move_data(struct moving_context *ctxt, - struct bpos start, - struct bpos end, - move_pred_fn pred, void *arg, - enum btree_id btree_id) +static int bch2_move_data_btree(struct moving_context *ctxt, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + enum btree_id btree_id) { - struct bch_fs *c = ctxt->c; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; + struct per_snapshot_io_opts snapshot_io_opts; + struct bch_io_opts *io_opts; struct bkey_buf sk; - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct data_update_opts data_opts; - u64 cur_inum = U64_MAX; int ret = 0, ret2; + per_snapshot_io_opts_init(&snapshot_io_opts, c); bch2_bkey_buf_init(&sk); if (ctxt->stats) { ctxt->stats->data_type = BCH_DATA_user; - ctxt->stats->btree_id = btree_id; - ctxt->stats->pos = start; + ctxt->stats->pos = BBPOS(btree_id, start); } bch2_trans_iter_init(trans, &iter, btree_id, start, @@ -547,7 +561,7 @@ static int __bch2_move_data(struct moving_context *ctxt, if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); - while (!move_ratelimit(trans, ctxt)) { + while (!bch2_move_ratelimit(ctxt)) { bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -564,17 +578,18 @@ static int __bch2_move_data(struct moving_context *ctxt, break; if (ctxt->stats) - ctxt->stats->pos = iter.pos; + ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); + ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, k, &io_opts, &data_opts)) + if (!pred(c, arg, k, io_opts, &data_opts)) goto next; /* @@ -584,24 +599,20 @@ static int __bch2_move_data(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, - io_opts, btree_id, k, data_opts); + ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, trans); + bch2_move_ctxt_wait_for_io(ctxt); continue; } /* XXX signal failure */ goto next; } - - if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, k.k->size); next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); @@ -610,59 +621,68 @@ next_nondata: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); + per_snapshot_io_opts_exit(&snapshot_io_opts); return ret; } -int bch2_move_data(struct bch_fs *c, - enum btree_id start_btree_id, struct bpos start_pos, - enum btree_id end_btree_id, struct bpos end_pos, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) +int __bch2_move_data(struct moving_context *ctxt, + struct bbpos start, + struct bbpos end, + move_pred_fn pred, void *arg) { - struct moving_context ctxt; + struct bch_fs *c = ctxt->trans->c; enum btree_id id; int ret = 0; - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - - for (id = start_btree_id; - id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); + for (id = start.btree; + id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); id++) { - stats->btree_id = id; + ctxt->stats->pos = BBPOS(id, POS_MIN); - if (id != BTREE_ID_extents && - id != BTREE_ID_reflink) + if (!btree_type_has_ptrs(id) || + !bch2_btree_id_root(c, id)->b) continue; - if (!bch2_btree_id_root(c, id)->b) - continue; - - ret = __bch2_move_data(&ctxt, - id == start_btree_id ? start_pos : POS_MIN, - id == end_btree_id ? end_pos : POS_MAX, + ret = bch2_move_data_btree(ctxt, + id == start.btree ? start.pos : POS_MIN, + id == end.btree ? end.pos : POS_MAX, pred, arg, id); if (ret) break; } + return ret; +} + +int bch2_move_data(struct bch_fs *c, + struct bbpos start, + struct bbpos end, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) +{ + + struct moving_context ctxt; + int ret; + + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + ret = __bch2_move_data(&ctxt, start, end, pred, arg); bch2_moving_ctxt_exit(&ctxt); return ret; } -int __bch2_evacuate_bucket(struct btree_trans *trans, - struct moving_context *ctxt, +int __bch2_evacuate_bucket(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) { - struct bch_fs *c = ctxt->c; + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_iter iter; struct bkey_buf sk; @@ -673,7 +693,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, struct data_update_opts data_opts; unsigned dirty_sectors, bucket_size; u64 fragmentation; - u64 cur_inum = U64_MAX; struct bpos bp_pos = POS_MIN; int ret = 0; @@ -708,7 +727,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, goto err; } - while (!(ret = move_ratelimit(trans, ctxt))) { + while (!(ret = bch2_move_ratelimit(ctxt))) { bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, @@ -737,7 +756,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); if (ret) { bch2_trans_iter_exit(trans, &iter); continue; @@ -758,23 +777,20 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, i++; } - ret = bch2_move_extent(trans, &iter, ctxt, - bucket_in_flight, - io_opts, bp.btree_id, k, data_opts); + ret = bch2_move_extent(ctxt, bucket_in_flight, + &iter, k, io_opts, data_opts); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, trans); + bch2_move_ctxt_wait_for_io(ctxt); continue; } if (ret) goto err; - if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, k.k->size); if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); } else { @@ -825,14 +841,12 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct write_point_specifier wp, bool wait_on_copygc) { - struct btree_trans *trans = bch2_trans_get(c); struct moving_context ctxt; int ret; bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); - bch2_trans_put(trans); return ret; } @@ -849,21 +863,25 @@ static int bch2_move_btree(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct btree_trans *trans = bch2_trans_get(c); + struct moving_context ctxt; + struct btree_trans *trans; struct btree_iter iter; struct btree *b; enum btree_id id; struct data_update_opts data_opts; int ret = 0; - progress_list_add(c, stats); + bch2_moving_ctxt_init(&ctxt, c, NULL, stats, + writepoint_ptr(&c->btree_write_point), + true); + trans = ctxt.trans; stats->data_type = BCH_DATA_btree; for (id = start_btree_id; id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); id++) { - stats->btree_id = id; + stats->pos = BBPOS(id, POS_MIN); if (!bch2_btree_id_root(c, id)->b) continue; @@ -882,7 +900,7 @@ retry: bpos_cmp(b->key.k.p, end_pos)) > 0) break; - stats->pos = iter.pos; + stats->pos = BBPOS(iter.btree_id, iter.pos); if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; @@ -904,14 +922,10 @@ next: break; } - bch2_trans_put(trans); - - if (ret) - bch_err_fn(c, ret); - + bch_err_fn(c, ret); + bch2_moving_ctxt_exit(&ctxt); bch2_btree_interior_updates_flush(c); - progress_list_del(c, stats); return ret; } @@ -1032,8 +1046,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) mutex_unlock(&c->sb_lock); } - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1056,14 +1069,16 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + (struct bbpos) { op.start_btree, op.start_pos }, + (struct bbpos) { op.end_btree, op.end_pos }, NULL, stats, writepoint_hashed((unsigned long) current), true, rereplicate_pred, c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; + + bch2_move_stats_exit(stats, c); break; case BCH_DATA_OP_MIGRATE: if (op.migrate.dev >= c->sb.nr_devices) @@ -1080,18 +1095,21 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + (struct bbpos) { op.start_btree, op.start_pos }, + (struct bbpos) { op.end_btree, op.end_pos }, NULL, stats, writepoint_hashed((unsigned long) current), true, migrate_pred, &op) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; + + bch2_move_stats_exit(stats, c); break; case BCH_DATA_OP_REWRITE_OLD_NODES: bch2_move_stats_init(stats, "rewrite_old_nodes"); ret = bch2_scan_old_btree_nodes(c, stats); + bch2_move_stats_exit(stats, c); break; default: ret = -EINVAL; @@ -1100,19 +1118,43 @@ int bch2_data_job(struct bch_fs *c, return ret; } -static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) +void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) { - struct bch_move_stats *stats = ctxt->stats; - struct moving_io *io; + prt_printf(out, "%s: data type=%s pos=", + stats->name, + bch2_data_types[stats->data_type]); + bch2_bbpos_to_text(out, stats->pos); + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_str(out, "keys moved: "); + prt_u64(out, atomic64_read(&stats->keys_moved)); + prt_newline(out); + + prt_str(out, "keys raced: "); + prt_u64(out, atomic64_read(&stats->keys_raced)); + prt_newline(out); + + prt_str(out, "bytes seen: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); + prt_newline(out); - prt_printf(out, "%s (%ps):", stats->name, ctxt->fn); + prt_str(out, "bytes moved: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); prt_newline(out); - prt_printf(out, " data type %s btree_id %s position: ", - bch2_data_types[stats->data_type], - bch2_btree_ids[stats->btree_id]); - bch2_bpos_to_text(out, stats->pos); + prt_str(out, "bytes raced: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); prt_newline(out); + + printbuf_indent_sub(out, 2); +} + +static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) +{ + struct moving_io *io; + + bch2_move_stats_to_text(out, ctxt->stats); printbuf_indent_add(out, 2); prt_printf(out, "reads: ios %u/%u sectors %u/%u", @@ -1153,7 +1195,4 @@ void bch2_fs_move_init(struct bch_fs *c) { INIT_LIST_HEAD(&c->moving_context_list); mutex_init(&c->moving_context_lock); - - INIT_LIST_HEAD(&c->data_progress_list); - mutex_init(&c->data_progress_lock); } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index cbdd58db8782..07cf9d42643b 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_MOVE_H #define _BCACHEFS_MOVE_H +#include "bbpos.h" #include "bcachefs_ioctl.h" #include "btree_iter.h" #include "buckets.h" @@ -11,7 +12,7 @@ struct bch_read_bio; struct moving_context { - struct bch_fs *c; + struct btree_trans *trans; struct list_head list; void *fn; @@ -37,13 +38,14 @@ struct moving_context { wait_queue_head_t wait; }; -#define move_ctxt_wait_event(_ctxt, _trans, _cond) \ +#define move_ctxt_wait_event(_ctxt, _cond) \ do { \ bool cond_finished = false; \ - bch2_moving_ctxt_do_pending_writes(_ctxt, _trans); \ + bch2_moving_ctxt_do_pending_writes(_ctxt); \ \ if (_cond) \ break; \ + bch2_trans_unlock_long((_ctxt)->trans); \ __wait_event((_ctxt)->wait, \ bch2_moving_ctxt_next_pending_write(_ctxt) || \ (cond_finished = (_cond))); \ @@ -59,22 +61,60 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct bch_ratelimit *, struct bch_move_stats *, struct write_point_specifier, bool); struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); -void bch2_moving_ctxt_do_pending_writes(struct moving_context *, - struct btree_trans *); +void bch2_moving_ctxt_do_pending_writes(struct moving_context *); +void bch2_move_ctxt_wait_for_io(struct moving_context *); +int bch2_move_ratelimit(struct moving_context *); + +/* Inodes in different snapshots may have different IO options: */ +struct snapshot_io_opts_entry { + u32 snapshot; + struct bch_io_opts io_opts; +}; + +struct per_snapshot_io_opts { + u64 cur_inum; + struct bch_io_opts fs_io_opts; + DARRAY(struct snapshot_io_opts_entry) d; +}; + +static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) +{ + memset(io_opts, 0, sizeof(*io_opts)); + io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); +} + +static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) +{ + darray_exit(&io_opts->d); +} + +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bkey_s_c); +int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); +int bch2_move_extent(struct moving_context *, + struct move_bucket_in_flight *, + struct btree_iter *, + struct bkey_s_c, + struct bch_io_opts, + struct data_update_opts); + +int __bch2_move_data(struct moving_context *, + struct bbpos, + struct bbpos, + move_pred_fn, void *); int bch2_move_data(struct bch_fs *, - enum btree_id, struct bpos, - enum btree_id, struct bpos, + struct bbpos start, + struct bbpos end, struct bch_ratelimit *, struct bch_move_stats *, struct write_point_specifier, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct btree_trans *, - struct moving_context *, +int __bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, struct data_update_opts); @@ -88,7 +128,10 @@ int bch2_data_job(struct bch_fs *, struct bch_move_stats *, struct bch_ioctl_data); -void bch2_move_stats_init(struct bch_move_stats *stats, char *name); +void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); +void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); +void bch2_move_stats_init(struct bch_move_stats *, char *); + void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_move_init(struct bch_fs *); diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index baf1f8570b3f..e22841ef31e4 100644 --- a/fs/bcachefs/move_types.h +++ b/fs/bcachefs/move_types.h @@ -2,17 +2,17 @@ #ifndef _BCACHEFS_MOVE_TYPES_H #define _BCACHEFS_MOVE_TYPES_H +#include "bbpos_types.h" + struct bch_move_stats { enum bch_data_type data_type; - enum btree_id btree_id; - struct bpos pos; - struct list_head list; + struct bbpos pos; char name[32]; atomic64_t keys_moved; atomic64_t keys_raced; - atomic64_t sectors_moved; atomic64_t sectors_seen; + atomic64_t sectors_moved; atomic64_t sectors_raced; }; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 4017120baeee..0a0576326c5b 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -101,8 +101,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, return ret; } -static void move_buckets_wait(struct btree_trans *trans, - struct moving_context *ctxt, +static void move_buckets_wait(struct moving_context *ctxt, struct buckets_in_flight *list, bool flush) { @@ -111,7 +110,7 @@ static void move_buckets_wait(struct btree_trans *trans, while ((i = list->first)) { if (flush) - move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count)); + move_ctxt_wait_event(ctxt, !atomic_read(&i->count)); if (atomic_read(&i->count)) break; @@ -129,7 +128,7 @@ static void move_buckets_wait(struct btree_trans *trans, kfree(i); } - bch2_trans_unlock(trans); + bch2_trans_unlock_long(ctxt->trans); } static bool bucket_in_flight(struct buckets_in_flight *list, @@ -140,11 +139,11 @@ static bool bucket_in_flight(struct buckets_in_flight *list, typedef DARRAY(struct move_bucket) move_buckets; -static int bch2_copygc_get_buckets(struct btree_trans *trans, - struct moving_context *ctxt, +static int bch2_copygc_get_buckets(struct moving_context *ctxt, struct buckets_in_flight *buckets_in_flight, move_buckets *buckets) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; @@ -152,7 +151,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; int ret; - move_buckets_wait(trans, ctxt, buckets_in_flight, false); + move_buckets_wait(ctxt, buckets_in_flight, false); ret = bch2_btree_write_buffer_flush(trans); if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()", @@ -188,10 +187,11 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, } noinline -static int bch2_copygc(struct btree_trans *trans, - struct moving_context *ctxt, - struct buckets_in_flight *buckets_in_flight) +static int bch2_copygc(struct moving_context *ctxt, + struct buckets_in_flight *buckets_in_flight, + bool *did_work) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct data_update_opts data_opts = { .btree_insert_flags = BCH_WATERMARK_copygc, @@ -202,7 +202,7 @@ static int bch2_copygc(struct btree_trans *trans, u64 moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; - ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets); + ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); if (ret) goto err; @@ -221,10 +221,12 @@ static int bch2_copygc(struct btree_trans *trans, break; } - ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket.k.bucket, + ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, f->bucket.k.gen, data_opts); if (ret) goto err; + + *did_work = true; } err: darray_exit(&buckets); @@ -300,24 +302,24 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) static int bch2_copygc_thread(void *arg) { struct bch_fs *c = arg; - struct btree_trans *trans; struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; - struct buckets_in_flight buckets; + struct buckets_in_flight *buckets; u64 last, wait; int ret = 0; - memset(&buckets, 0, sizeof(buckets)); - - ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); + buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL); + if (!buckets) + return -ENOMEM; + ret = rhashtable_init(&buckets->table, &bch_move_bucket_params); if (ret) { + kfree(buckets); bch_err_msg(c, ret, "allocating copygc buckets in flight"); return ret; } set_freezable(); - trans = bch2_trans_get(c); bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, @@ -325,16 +327,18 @@ static int bch2_copygc_thread(void *arg) false); while (!ret && !kthread_should_stop()) { - bch2_trans_unlock(trans); + bool did_work = false; + + bch2_trans_unlock_long(ctxt.trans); cond_resched(); if (!c->copy_gc_enabled) { - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); kthread_wait_freezable(c->copy_gc_enabled); } if (unlikely(freezing(current))) { - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); __refrigerator(false); continue; } @@ -345,7 +349,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -355,16 +359,29 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(trans, &ctxt, &buckets); + ret = bch2_copygc(&ctxt, buckets, &did_work); c->copygc_running = false; wake_up(&c->copygc_running_wq); + + if (!wait && !did_work) { + u64 min_member_capacity = bch2_min_rw_member_capacity(c); + + if (min_member_capacity == U64_MAX) + min_member_capacity = 128 * 2048; + + bch2_trans_unlock_long(ctxt.trans); + bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), + MAX_SCHEDULE_TIMEOUT); + } } - move_buckets_wait(trans, &ctxt, &buckets, true); - rhashtable_destroy(&buckets.table); - bch2_trans_put(trans); + move_buckets_wait(&ctxt, buckets, true); + + rhashtable_destroy(&buckets->table); + kfree(buckets); bch2_moving_ctxt_exit(&ctxt); + bch2_move_stats_exit(&move_stats, c); return 0; } diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 232f50c73a94..8dd4046cca41 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -12,11 +12,6 @@ #define x(t, n, ...) [n] = #t, -const char * const bch2_iops_measurements[] = { - BCH_IOPS_MEASUREMENTS() - NULL -}; - const char * const bch2_error_actions[] = { BCH_ERROR_ACTIONS() NULL @@ -42,9 +37,8 @@ const char * const bch2_sb_compat[] = { NULL }; -const char * const bch2_btree_ids[] = { +const char * const __bch2_btree_ids[] = { BCH_BTREE_IDS() - "interior btree node", NULL }; @@ -271,14 +265,14 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) if (err) prt_printf(err, "%s: too small (min %llu)", opt->attr.name, opt->min); - return -ERANGE; + return -BCH_ERR_ERANGE_option_too_small; } if (opt->max && v >= opt->max) { if (err) prt_printf(err, "%s: too big (max %llu)", opt->attr.name, opt->max); - return -ERANGE; + return -BCH_ERR_ERANGE_option_too_big; } if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { @@ -295,6 +289,9 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) return -EINVAL; } + if (opt->fn.validate) + return opt->fn.validate(v, err); + return 0; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 55014336c5f7..8526f177450a 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -10,13 +10,12 @@ struct bch_fs; -extern const char * const bch2_iops_measurements[]; extern const char * const bch2_error_actions[]; extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; -extern const char * const bch2_btree_ids[]; +extern const char * const __bch2_btree_ids[]; extern const char * const bch2_csum_types[]; extern const char * const bch2_csum_opts[]; extern const char * const bch2_compression_types[]; @@ -74,6 +73,7 @@ enum opt_type { struct bch_opt_fn { int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *); void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); + int (*validate)(u64, struct printbuf *); }; /** diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index de41f9a14492..5e653eb81d54 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -415,11 +415,11 @@ void bch2_prt_bitflags(struct printbuf *out, while (list[nr]) nr++; - while (flags && (bit = __ffs(flags)) < nr) { + while (flags && (bit = __ffs64(flags)) < nr) { if (!first) bch2_prt_printf(out, ","); first = false; bch2_prt_printf(out, "%s", list[bit]); - flags ^= 1 << bit; + flags ^= BIT_ULL(bit); } } diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index cb68ae44d597..a54647c36b85 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,17 +59,18 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->p.inode >= QTYP_NR) { - prt_printf(err, "invalid quota type (%llu >= %u)", - k.k->p.inode, QTYP_NR); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, + quota_type_invalid, + "invalid quota type (%llu >= %u)", + k.k->p.inode, QTYP_NR); +fsck_err: + return ret; } void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 2f463874a362..884f601f41c4 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,7 +8,7 @@ enum bkey_invalid_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 568f1e8e7507..3319190b8d9c 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -1,15 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "btree_iter.h" +#include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" #include "compress.h" #include "disk_groups.h" #include "errcode.h" +#include "error.h" +#include "inode.h" #include "move.h" #include "rebalance.h" +#include "subvolume.h" #include "super-io.h" #include "trace.h" @@ -17,302 +23,396 @@ #include <linux/kthread.h> #include <linux/sched/cputime.h> -/* - * Check if an extent should be moved: - * returns -1 if it should not be moved, or - * device of pointer that should be moved, if known, or INT_MAX if unknown - */ -static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned i; - - data_opts->rewrite_ptrs = 0; - data_opts->target = io_opts->background_target; - data_opts->extra_replicas = 0; - data_opts->btree_insert_flags = 0; - - if (io_opts->background_compression && - !bch2_bkey_is_incompressible(k)) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (!p.ptr.cached && - p.crc.compression_type != - bch2_compression_opt_to_type(io_opts->background_compression)) - data_opts->rewrite_ptrs |= 1U << i; - i++; - } - } +#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) - if (io_opts->background_target) { - const struct bch_extent_ptr *ptr; +static const char * const bch2_rebalance_state_strs[] = { +#define x(t) #t, + BCH_REBALANCE_STATES() + NULL +#undef x +}; - i = 0; - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && - !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) && - bch2_target_accepts_data(c, BCH_DATA_user, io_opts->background_target)) - data_opts->rewrite_ptrs |= 1U << i; - i++; - } - } +static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_cookie *cookie; + u64 v; + int ret; - return data_opts->rewrite_ptrs != 0; + bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + v = k.k->type == KEY_TYPE_cookie + ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) + : 0; + + cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); + ret = PTR_ERR_OR_ZERO(cookie); + if (ret) + goto err; + + bkey_cookie_init(&cookie->k_i); + cookie->k.p = iter.pos; + cookie->v.cookie = cpu_to_le64(v + 1); + + ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } -void bch2_rebalance_add_key(struct bch_fs *c, - struct bkey_s_c k, - struct bch_io_opts *io_opts) +int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - struct data_update_opts update_opts = { 0 }; - struct bkey_ptrs_c ptrs; - const struct bch_extent_ptr *ptr; - unsigned i; - - if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) - return; - - i = 0; - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr(ptrs, ptr) { - if ((1U << i) && update_opts.rewrite_ptrs) - if (atomic64_add_return(k.k->size, - &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == - k.k->size) - rebalance_wakeup(c); - i++; - } + int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + __bch2_set_rebalance_needs_scan(trans, inum)); + rebalance_wakeup(c); + return ret; } -void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) +int bch2_set_fs_needs_rebalance(struct bch_fs *c) { - if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) == - sectors) - rebalance_wakeup(c); + return bch2_set_rebalance_needs_scan(c, 0); } -struct rebalance_work { - int dev_most_full_idx; - unsigned dev_most_full_percent; - u64 dev_most_full_work; - u64 dev_most_full_capacity; - u64 total_work; -}; +static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) +{ + struct btree_iter iter; + struct bkey_s_c k; + u64 v; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + v = k.k->type == KEY_TYPE_cookie + ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) + : 0; + + if (v == cookie) + ret = bch2_btree_delete_at(trans, &iter, 0); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} -static void rebalance_work_accumulate(struct rebalance_work *w, - u64 dev_work, u64 unknown_dev, u64 capacity, int idx) +static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, + struct btree_iter *work_iter) { - unsigned percent_full; - u64 work = dev_work + unknown_dev; + return !kthread_should_stop() + ? bch2_btree_iter_peek(work_iter) + : bkey_s_c_null; +} - /* avoid divide by 0 */ - if (!capacity) - return; +static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + extent_entry_drop(bkey_i_to_s(n), + (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); + return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); +} + +static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + struct bpos work_pos, + struct btree_iter *extent_iter, + struct data_update_opts *data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c k; + + bch2_trans_iter_exit(trans, extent_iter); + bch2_trans_iter_init(trans, extent_iter, + work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, + work_pos, + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek_slot(extent_iter); + if (bkey_err(k)) + return k; + + const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; + if (!r) { + /* raced due to btree write buffer, nothing to do */ + return bkey_s_c_null; + } - if (work < dev_work || work < unknown_dev) - work = U64_MAX; - work = min(work, capacity); + memset(data_opts, 0, sizeof(*data_opts)); - percent_full = div64_u64(work * 100, capacity); + data_opts->rewrite_ptrs = + bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); + data_opts->target = r->target; - if (percent_full >= w->dev_most_full_percent) { - w->dev_most_full_idx = idx; - w->dev_most_full_percent = percent_full; - w->dev_most_full_work = work; - w->dev_most_full_capacity = capacity; + if (!data_opts->rewrite_ptrs) { + /* + * device we would want to write to offline? devices in target + * changed? + * + * We'll now need a full scan before this extent is picked up + * again: + */ + int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); + if (ret) + return bkey_s_c_err(ret); + return bkey_s_c_null; } - if (w->total_work + dev_work >= w->total_work && - w->total_work + dev_work >= dev_work) - w->total_work += dev_work; + return k; } -static struct rebalance_work rebalance_work(struct bch_fs *c) +noinline_for_stack +static int do_rebalance_extent(struct moving_context *ctxt, + struct bpos work_pos, + struct btree_iter *extent_iter) { - struct bch_dev *ca; - struct rebalance_work ret = { .dev_most_full_idx = -1 }; - u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev); - unsigned i; - - for_each_online_member(ca, c, i) - rebalance_work_accumulate(&ret, - atomic64_read(&ca->rebalance_work), - unknown_dev, - bucket_to_sector(ca, ca->mi.nbuckets - - ca->mi.first_bucket), - i); - - rebalance_work_accumulate(&ret, - unknown_dev, 0, c->capacity, -1); + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; + struct bch_fs_rebalance *r = &trans->c->rebalance; + struct data_update_opts data_opts; + struct bch_io_opts io_opts; + struct bkey_s_c k; + struct bkey_buf sk; + int ret; + + ctxt->stats = &r->work_stats; + r->state = BCH_REBALANCE_working; + + bch2_bkey_buf_init(&sk); + + ret = bkey_err(k = next_rebalance_extent(trans, work_pos, + extent_iter, &data_opts)); + if (ret || !k.k) + goto out; + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); + if (ret) + goto out; + + atomic64_add(k.k->size, &ctxt->stats->sectors_seen); + + /* + * The iterator gets unlocked by __bch2_read_extent - need to + * save a copy of @k elsewhere: + */ + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + + ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); + if (ret) { + if (bch2_err_matches(ret, ENOMEM)) { + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(ctxt); + ret = -BCH_ERR_transaction_restart_nested; + } + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto out; + + /* skip it and continue, XXX signal failure */ + ret = 0; + } +out: + bch2_bkey_buf_exit(&sk, c); return ret; } -static void rebalance_work_reset(struct bch_fs *c) +static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { - struct bch_dev *ca; - unsigned i; + unsigned target, compression; + + if (k.k->p.inode) { + target = io_opts->background_target; + compression = io_opts->background_compression ?: io_opts->compression; + } else { + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); - for_each_online_member(ca, c, i) - atomic64_set(&ca->rebalance_work, 0); + target = r ? r->target : io_opts->background_target; + compression = r ? r->compression : + (io_opts->background_compression ?: io_opts->compression); + } - atomic64_set(&c->rebalance.work_unknown_dev, 0); + data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); + data_opts->target = target; + return data_opts->rewrite_ptrs != 0; } -static unsigned long curr_cputime(void) +static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) { - u64 utime, stime; + struct btree_trans *trans = ctxt->trans; + struct bch_fs_rebalance *r = &trans->c->rebalance; + int ret; + + bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); + ctxt->stats = &r->scan_stats; - task_cputime_adjusted(current, &utime, &stime); - return nsecs_to_jiffies(utime + stime); + if (!inum) { + r->scan_start = BBPOS_MIN; + r->scan_end = BBPOS_MAX; + } else { + r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0)); + r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX)); + } + + r->state = BCH_REBALANCE_scanning; + + ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?: + commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_clear_rebalance_needs_scan(trans, inum, cookie)); + + bch2_move_stats_exit(&r->scan_stats, trans->c); + return ret; } -static int bch2_rebalance_thread(void *arg) +static void rebalance_wait(struct bch_fs *c) { - struct bch_fs *c = arg; struct bch_fs_rebalance *r = &c->rebalance; struct io_clock *clock = &c->io_clock[WRITE]; - struct rebalance_work w, p; - struct bch_move_stats move_stats; - unsigned long start, prev_start; - unsigned long prev_run_time, prev_run_cputime; - unsigned long cputime, prev_cputime; - u64 io_start; - long throttle; + u64 now = atomic64_read(&clock->now); + u64 min_member_capacity = bch2_min_rw_member_capacity(c); - set_freezable(); + if (min_member_capacity == U64_MAX) + min_member_capacity = 128 * 2048; + + r->wait_iotime_end = now + (min_member_capacity >> 6); - io_start = atomic64_read(&clock->now); - p = rebalance_work(c); - prev_start = jiffies; - prev_cputime = curr_cputime(); + if (r->state != BCH_REBALANCE_waiting) { + r->wait_iotime_start = now; + r->wait_wallclock_start = ktime_get_real_ns(); + r->state = BCH_REBALANCE_waiting; + } + + bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); +} - bch2_move_stats_init(&move_stats, "rebalance"); - while (!kthread_wait_freezable(r->enabled)) { - cond_resched(); +static int do_rebalance(struct moving_context *ctxt) +{ + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; + struct bch_fs_rebalance *r = &c->rebalance; + struct btree_iter rebalance_work_iter, extent_iter = { NULL }; + struct bkey_s_c k; + int ret = 0; - start = jiffies; - cputime = curr_cputime(); + bch2_move_stats_init(&r->work_stats, "rebalance_work"); + bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); - prev_run_time = start - prev_start; - prev_run_cputime = cputime - prev_cputime; + bch2_trans_iter_init(trans, &rebalance_work_iter, + BTREE_ID_rebalance_work, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS); - w = rebalance_work(c); - BUG_ON(!w.dev_most_full_capacity); + while (!bch2_move_ratelimit(ctxt) && + !kthread_wait_freezable(r->enabled)) { + bch2_trans_begin(trans); - if (!w.total_work) { - r->state = REBALANCE_WAITING; - kthread_wait_freezable(rebalance_work(c).total_work); + ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; - } + if (ret || !k.k) + break; - /* - * If there isn't much work to do, throttle cpu usage: - */ - throttle = prev_run_cputime * 100 / - max(1U, w.dev_most_full_percent) - - prev_run_time; - - if (w.dev_most_full_percent < 20 && throttle > 0) { - r->throttled_until_iotime = io_start + - div_u64(w.dev_most_full_capacity * - (20 - w.dev_most_full_percent), - 50); - - if (atomic64_read(&clock->now) + clock->max_slop < - r->throttled_until_iotime) { - r->throttled_until_cputime = start + throttle; - r->state = REBALANCE_THROTTLED; - - bch2_kthread_io_clock_wait(clock, - r->throttled_until_iotime, - throttle); - continue; - } - } + ret = k.k->type == KEY_TYPE_cookie + ? do_rebalance_scan(ctxt, k.k->p.inode, + le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) + : do_rebalance_extent(ctxt, k.k->p, &extent_iter); - /* minimum 1 mb/sec: */ - r->pd.rate.rate = - max_t(u64, 1 << 11, - r->pd.rate.rate * - max(p.dev_most_full_percent, 1U) / - max(w.dev_most_full_percent, 1U)); - - io_start = atomic64_read(&clock->now); - p = w; - prev_start = start; - prev_cputime = cputime; - - r->state = REBALANCE_RUNNING; - memset(&move_stats, 0, sizeof(move_stats)); - rebalance_work_reset(c); - - bch2_move_data(c, - 0, POS_MIN, - BTREE_ID_NR, POS_MAX, - /* ratelimiting disabled for now */ - NULL, /* &r->pd.rate, */ - &move_stats, - writepoint_ptr(&c->rebalance_write_point), - true, - rebalance_pred, NULL); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + + bch2_btree_iter_advance(&rebalance_work_iter); } - return 0; + bch2_trans_iter_exit(trans, &extent_iter); + bch2_trans_iter_exit(trans, &rebalance_work_iter); + bch2_move_stats_exit(&r->scan_stats, c); + + if (!ret && + !kthread_should_stop() && + !atomic64_read(&r->work_stats.sectors_seen) && + !atomic64_read(&r->scan_stats.sectors_seen)) { + bch2_trans_unlock_long(trans); + rebalance_wait(c); + } + + if (!bch2_err_matches(ret, EROFS)) + bch_err_fn(c, ret); + return ret; } -void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) +static int bch2_rebalance_thread(void *arg) { + struct bch_fs *c = arg; struct bch_fs_rebalance *r = &c->rebalance; - struct rebalance_work w = rebalance_work(c); + struct moving_context ctxt; + int ret; - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 20); + set_freezable(); - prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); - prt_tab(out); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, + writepoint_ptr(&c->rebalance_write_point), + true); - prt_human_readable_u64(out, w.dev_most_full_work << 9); - prt_printf(out, "/"); - prt_human_readable_u64(out, w.dev_most_full_capacity << 9); - prt_newline(out); + while (!kthread_should_stop() && + !(ret = do_rebalance(&ctxt))) + ; - prt_printf(out, "total work:"); - prt_tab(out); + bch2_moving_ctxt_exit(&ctxt); - prt_human_readable_u64(out, w.total_work << 9); - prt_printf(out, "/"); - prt_human_readable_u64(out, c->capacity << 9); - prt_newline(out); + return 0; +} + +void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct bch_fs_rebalance *r = &c->rebalance; - prt_printf(out, "rate:"); - prt_tab(out); - prt_printf(out, "%u", r->pd.rate.rate); + prt_str(out, bch2_rebalance_state_strs[r->state]); prt_newline(out); + printbuf_indent_add(out, 2); switch (r->state) { - case REBALANCE_WAITING: - prt_printf(out, "waiting"); + case BCH_REBALANCE_waiting: { + u64 now = atomic64_read(&c->io_clock[WRITE].now); + + prt_str(out, "io wait duration: "); + bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start); + prt_newline(out); + + prt_str(out, "io wait remaining: "); + bch2_prt_human_readable_s64(out, r->wait_iotime_end - now); + prt_newline(out); + + prt_str(out, "duration waited: "); + bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start); + prt_newline(out); break; - case REBALANCE_THROTTLED: - prt_printf(out, "throttled for %lu sec or ", - (r->throttled_until_cputime - jiffies) / HZ); - prt_human_readable_u64(out, - (r->throttled_until_iotime - - atomic64_read(&c->io_clock[WRITE].now)) << 9); - prt_printf(out, " io"); + } + case BCH_REBALANCE_working: + bch2_move_stats_to_text(out, &r->work_stats); break; - case REBALANCE_RUNNING: - prt_printf(out, "running"); + case BCH_REBALANCE_scanning: + bch2_move_stats_to_text(out, &r->scan_stats); break; } prt_newline(out); + printbuf_indent_sub(out, 2); } void bch2_rebalance_stop(struct bch_fs *c) @@ -361,6 +461,4 @@ int bch2_rebalance_start(struct bch_fs *c) void bch2_fs_rebalance_init(struct bch_fs *c) { bch2_pd_controller_init(&c->rebalance.pd); - - atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX); } diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 7ade0bb81cce..28a52638f16c 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -4,6 +4,9 @@ #include "rebalance_types.h" +int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); +int bch2_set_fs_needs_rebalance(struct bch_fs *); + static inline void rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; @@ -15,11 +18,7 @@ static inline void rebalance_wakeup(struct bch_fs *c) rcu_read_unlock(); } -void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c, - struct bch_io_opts *); -void bch2_rebalance_add_work(struct bch_fs *, u64); - -void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *); +void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h index 7462a92e9598..0fffb536c1d0 100644 --- a/fs/bcachefs/rebalance_types.h +++ b/fs/bcachefs/rebalance_types.h @@ -2,25 +2,36 @@ #ifndef _BCACHEFS_REBALANCE_TYPES_H #define _BCACHEFS_REBALANCE_TYPES_H +#include "bbpos_types.h" #include "move_types.h" -enum rebalance_state { - REBALANCE_WAITING, - REBALANCE_THROTTLED, - REBALANCE_RUNNING, +#define BCH_REBALANCE_STATES() \ + x(waiting) \ + x(working) \ + x(scanning) + +enum bch_rebalance_states { +#define x(t) BCH_REBALANCE_##t, + BCH_REBALANCE_STATES() +#undef x }; struct bch_fs_rebalance { - struct task_struct __rcu *thread; + struct task_struct __rcu *thread; struct bch_pd_controller pd; - atomic64_t work_unknown_dev; + enum bch_rebalance_states state; + u64 wait_iotime_start; + u64 wait_iotime_end; + u64 wait_wallclock_start; + + struct bch_move_stats work_stats; - enum rebalance_state state; - u64 throttled_until_iotime; - unsigned long throttled_until_cputime; + struct bbpos scan_start; + struct bbpos scan_end; + struct bch_move_stats scan_stats; - unsigned enabled:1; + unsigned enabled:1; }; #endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 4cd660650e5b..9c30500ce920 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -23,6 +23,7 @@ #include "logged_ops.h" #include "move.h" #include "quota.h" +#include "rebalance.h" #include "recovery.h" #include "replicas.h" #include "sb-clean.h" @@ -182,7 +183,7 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_replay_key(trans, k)); if (ret) { bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", - bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret)); + bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret)); goto err; } } @@ -225,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c, if (entry->u64s) { r->level = entry->level; - bkey_copy(&r->key, &entry->start[0]); + bkey_copy(&r->key, (struct bkey_i *) entry->start); r->error = 0; } else { r->error = -EIO; @@ -364,10 +365,12 @@ static int read_btree_roots(struct bch_fs *c) } if (r->error) { - __fsck_err(c, btree_id_is_alloc(i) + __fsck_err(c, + btree_id_is_alloc(i) ? FSCK_CAN_IGNORE : 0, + btree_root_bkey_invalid, "invalid btree root %s", - bch2_btree_ids[i]); + bch2_btree_id_str(i)); if (i == BTREE_ID_alloc) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); } @@ -375,8 +378,9 @@ static int read_btree_roots(struct bch_fs *c) ret = bch2_btree_root_read(c, i, &r->key, r->level); if (ret) { fsck_err(c, + btree_root_read_error, "error reading btree root %s", - bch2_btree_ids[i]); + bch2_btree_id_str(i)); if (btree_id_is_alloc(i)) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); ret = 0; @@ -713,6 +717,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (mustfix_fsck_err_on(c->sb.clean && last_journal_entry && !journal_entry_empty(last_journal_entry), c, + clean_but_journal_not_empty, "filesystem marked clean but journal not empty")) { c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); @@ -720,7 +725,9 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!last_journal_entry) { - fsck_err_on(!c->sb.clean, c, "no journal entries found"); + fsck_err_on(!c->sb.clean, c, + dirty_but_no_journal_entries, + "no journal entries found"); if (clean) goto use_clean; @@ -728,6 +735,13 @@ int bch2_fs_recovery(struct bch_fs *c) if (*i) { last_journal_entry = &(*i)->j; (*i)->ignore = false; + /* + * This was probably a NO_FLUSH entry, + * so last_seq was garbage - but we know + * we're only using a single journal + * entry, set it here: + */ + (*i)->j.last_seq = (*i)->j.seq; break; } } @@ -901,7 +915,7 @@ out: } kfree(clean); - if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) { + if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { bch2_fs_read_write_early(c); bch2_delete_dead_snapshots_async(c); } @@ -946,16 +960,12 @@ int bch2_fs_initialize(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - for_each_online_member(ca, c, i) + for_each_member_device(ca, c, i) bch2_dev_usage_init(ca); - for_each_online_member(ca, c, i) { - ret = bch2_dev_journal_alloc(ca); - if (ret) { - percpu_ref_put(&ca->io_ref); - goto err; - } - } + ret = bch2_fs_journal_alloc(c); + if (ret) + goto err; /* * journal_res_get() will crash if called before this has @@ -973,15 +983,13 @@ int bch2_fs_initialize(struct bch_fs *c) * btree updates */ bch_verbose(c, "marking superblocks"); - for_each_member_device(ca, c, i) { - ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { - percpu_ref_put(&ca->ref); - goto err; - } + ret = bch2_trans_mark_dev_sbs(c); + bch_err_msg(c, ret, "marking superblocks"); + if (ret) + goto err; + for_each_online_member(ca, c, i) ca->new_fs_bucket_idx = 0; - } ret = bch2_fs_freespace_init(c); if (ret) diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index fbfa9d831d6f..515e3d62c2ac 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -14,6 +14,8 @@ x(snapshots_read, PASS_ALWAYS) \ x(check_topology, 0) \ x(check_allocations, PASS_FSCK) \ + x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \ + x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, PASS_ALWAYS) \ x(check_alloc_info, PASS_FSCK) \ @@ -27,11 +29,12 @@ x(check_snapshot_trees, PASS_FSCK) \ x(check_snapshots, PASS_FSCK) \ x(check_subvols, PASS_FSCK) \ - x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ + x(delete_dead_snapshots, PASS_FSCK) \ x(fs_upgrade_for_subvolumes, 0) \ x(resume_logged_ops, PASS_ALWAYS) \ x(check_inodes, PASS_FSCK) \ x(check_extents, PASS_FSCK) \ + x(check_indirect_extents, PASS_FSCK) \ x(check_dirents, PASS_FSCK) \ x(check_xattrs, PASS_FSCK) \ x(check_root, PASS_FSCK) \ @@ -39,6 +42,7 @@ x(check_nlinks, PASS_FSCK) \ x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 0) \ + x(set_fs_needs_rebalance, 0) \ enum bch_recovery_pass { #define x(n, when) BCH_RECOVERY_PASS_##n, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index d77d0ea9afff..6e1bfe9feb59 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -7,6 +7,7 @@ #include "inode.h" #include "io_misc.h" #include "io_write.h" +#include "rebalance.h" #include "reflink.h" #include "subvolume.h" #include "super-io.h" @@ -27,7 +28,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -74,7 +75,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r /* indirect extents */ -int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -103,28 +104,29 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r } #endif +static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) +{ + if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { + new->k.type = KEY_TYPE_deleted; + new->k.size = 0; + set_bkey_val_u64s(&new->k, 0);; + *flags &= ~BTREE_TRIGGER_INSERT; + } +} + int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - if (!(flags & BTREE_TRIGGER_OVERWRITE)) { - struct bkey_i_reflink_v *r = bkey_i_to_reflink_v(new); - - if (!r->v.refcount) { - r->k.type = KEY_TYPE_deleted; - r->k.size = 0; - set_bkey_val_u64s(&r->k, 0); - return 0; - } - } + check_indirect_extent_deleting(new, &flags); return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); } /* indirect inline data */ -int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -132,7 +134,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, } void bch2_indirect_inline_data_to_text(struct printbuf *out, - struct bch_fs *c, struct bkey_s_c k) + struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); unsigned datalen = bkey_inline_data_bytes(k.k); @@ -147,16 +149,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - if (!(flags & BTREE_TRIGGER_OVERWRITE)) { - struct bkey_i_indirect_inline_data *r = - bkey_i_to_indirect_inline_data(new); - - if (!r->v.refcount) { - r->k.type = KEY_TYPE_deleted; - r->k.size = 0; - set_bkey_val_u64s(&r->k, 0); - } - } + check_indirect_extent_deleting(new, &flags); return 0; } @@ -260,8 +253,9 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos dst_start = POS(dst_inum.inum, dst_offset); struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; + struct bch_io_opts opts; struct bpos src_want; - u64 dst_done; + u64 dst_done = 0; u32 dst_snapshot, src_snapshot; int ret = 0, ret2 = 0; @@ -277,6 +271,10 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); trans = bch2_trans_get(c); + ret = bch2_inum_opts_get(trans, src_inum, &opts); + if (ret) + goto err; + bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_INTENT); bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, @@ -360,10 +358,13 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, - true); + ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, + opts.background_target, + opts.background_compression) ?: + bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, + true); bch2_disk_reservation_put(c, &disk_res); } bch2_trans_iter_exit(trans, &dst_iter); @@ -394,7 +395,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(trans, &inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); - +err: bch2_trans_put(trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index fe52538efb52..8ccf3f9c4939 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,7 +4,7 @@ enum bkey_invalid_flags; -int bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -19,7 +19,7 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -35,7 +35,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index cef2a0447b86..1c3ae13bfced 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -462,18 +462,13 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) { lockdep_assert_held(&c->replicas_gc_lock); - if (ret) - goto err; - mutex_lock(&c->sb_lock); percpu_down_write(&c->mark_lock); - ret = bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); - if (ret) - goto err; + ret = ret ?: + bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: + replicas_table_update(c, &c->replicas_gc); - ret = replicas_table_update(c, &c->replicas_gc); -err: kfree(c->replicas_gc.entries); c->replicas_gc.entries = NULL; @@ -579,12 +574,9 @@ retry: bch2_cpu_replicas_sort(&new); - ret = bch2_cpu_replicas_to_sb_replicas(c, &new); - if (ret) - goto err; + ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: + replicas_table_update(c, &new); - ret = replicas_table_update(c, &new); -err: kfree(new.entries); percpu_up_write(&c->mark_lock); diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 61203d7c8d36..e151ada1c8bd 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -82,6 +82,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c, int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, + sb_clean_journal_seq_mismatch, "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", le64_to_cpu(clean->journal_seq), le64_to_cpu(j->seq))) { @@ -119,6 +120,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c, k1->k.u64s != k2->k.u64s || memcmp(k1, k2, bkey_bytes(&k1->k)) || l1 != l2, c, + sb_clean_btree_root_mismatch, "superblock btree root %u doesn't match journal after clean shutdown\n" "sb: l=%u %s\n" "journal: l=%u %s\n", i, @@ -140,6 +142,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); if (fsck_err_on(!sb_clean, c, + sb_clean_missing, "superblock marked clean but clean section not present")) { SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->sb.clean = false; @@ -373,7 +376,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) entry = sb_clean->start; bch2_journal_super_entries_add_common(c, &entry, 0); - entry = bch2_btree_roots_to_journal_entries(c, entry, entry); + entry = bch2_btree_roots_to_journal_entries(c, entry, 0); BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); memset(entry, 0, diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c new file mode 100644 index 000000000000..f0930ab7f036 --- /dev/null +++ b/fs/bcachefs/sb-errors.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "sb-errors.h" +#include "super-io.h" + +static const char * const bch2_sb_error_strs[] = { +#define x(t, n, ...) [n] = #t, + BCH_SB_ERRS() + NULL +}; + +static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) +{ + if (id < BCH_SB_ERR_MAX) + prt_str(out, bch2_sb_error_strs[id]); + else + prt_printf(out, "(unknown error %u)", id); +} + +static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) +{ + return e + ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) + : 0; +} + +static inline unsigned bch2_sb_field_errors_u64s(unsigned nr) +{ + return (sizeof(struct bch_sb_field_errors) + + sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64); +} + +static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_errors *e = field_to_type(f, errors); + unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + + for (i = 0; i < nr; i++) { + if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) { + prt_printf(err, "entry with count 0 (id "); + bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + prt_printf(err, ")"); + return -BCH_ERR_invalid_sb_errors; + } + + if (i + 1 < nr && + BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >= + BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) { + prt_printf(err, "entries out of order"); + return -BCH_ERR_invalid_sb_errors; + } + } + + return 0; +} + +static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_errors *e = field_to_type(f, errors); + unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + + if (out->nr_tabstops <= 1) + printbuf_tabstop_push(out, 16); + + for (i = 0; i < nr; i++) { + bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + prt_tab(out); + prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); + prt_tab(out); + bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time)); + prt_newline(out); + } +} + +const struct bch_sb_field_ops bch_sb_field_ops_errors = { + .validate = bch2_sb_errors_validate, + .to_text = bch2_sb_errors_to_text, +}; + +void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) +{ + bch_sb_errors_cpu *e = &c->fsck_error_counts; + struct bch_sb_error_entry_cpu n = { + .id = err, + .nr = 1, + .last_error_time = ktime_get_real_seconds() + }; + unsigned i; + + mutex_lock(&c->fsck_error_counts_lock); + for (i = 0; i < e->nr; i++) { + if (err == e->data[i].id) { + e->data[i].nr++; + e->data[i].last_error_time = n.last_error_time; + goto out; + } + if (err < e->data[i].id) + break; + } + + if (darray_make_room(e, 1)) + goto out; + + darray_insert_item(e, i, n); +out: + mutex_unlock(&c->fsck_error_counts_lock); +} + +void bch2_sb_errors_from_cpu(struct bch_fs *c) +{ + bch_sb_errors_cpu *src = &c->fsck_error_counts; + struct bch_sb_field_errors *dst = + bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); + unsigned i; + + if (!dst) + return; + + for (i = 0; i < src->nr; i++) { + SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); + SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); + dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); + } +} + +static int bch2_sb_errors_to_cpu(struct bch_fs *c) +{ + struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); + bch_sb_errors_cpu *dst = &c->fsck_error_counts; + unsigned i, nr = bch2_sb_field_errors_nr_entries(src); + int ret; + + if (!nr) + return 0; + + mutex_lock(&c->fsck_error_counts_lock); + ret = darray_make_room(dst, nr); + if (ret) + goto err; + + dst->nr = nr; + + for (i = 0; i < nr; i++) { + dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); + dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); + dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); + } +err: + mutex_unlock(&c->fsck_error_counts_lock); + + return ret; +} + +void bch2_fs_sb_errors_exit(struct bch_fs *c) +{ + darray_exit(&c->fsck_error_counts); +} + +void bch2_fs_sb_errors_init_early(struct bch_fs *c) +{ + mutex_init(&c->fsck_error_counts_lock); + darray_init(&c->fsck_error_counts); +} + +int bch2_fs_sb_errors_init(struct bch_fs *c) +{ + return bch2_sb_errors_to_cpu(c); +} diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h new file mode 100644 index 000000000000..5a09a53966be --- /dev/null +++ b/fs/bcachefs/sb-errors.h @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_ERRORS_H +#define _BCACHEFS_SB_ERRORS_H + +#include "sb-errors_types.h" + +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0) \ + x(dirty_but_no_journal_entries, 1) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ + x(sb_clean_journal_seq_mismatch, 3) \ + x(sb_clean_btree_root_mismatch, 4) \ + x(sb_clean_missing, 5) \ + x(jset_unsupported_version, 6) \ + x(jset_unknown_csum, 7) \ + x(jset_last_seq_newer_than_seq, 8) \ + x(jset_past_bucket_end, 9) \ + x(jset_seq_blacklisted, 10) \ + x(journal_entries_missing, 11) \ + x(journal_entry_replicas_not_marked, 12) \ + x(journal_entry_past_jset_end, 13) \ + x(journal_entry_replicas_data_mismatch, 14) \ + x(journal_entry_bkey_u64s_0, 15) \ + x(journal_entry_bkey_past_end, 16) \ + x(journal_entry_bkey_bad_format, 17) \ + x(journal_entry_bkey_invalid, 18) \ + x(journal_entry_btree_root_bad_size, 19) \ + x(journal_entry_blacklist_bad_size, 20) \ + x(journal_entry_blacklist_v2_bad_size, 21) \ + x(journal_entry_blacklist_v2_start_past_end, 22) \ + x(journal_entry_usage_bad_size, 23) \ + x(journal_entry_data_usage_bad_size, 24) \ + x(journal_entry_clock_bad_size, 25) \ + x(journal_entry_clock_bad_rw, 26) \ + x(journal_entry_dev_usage_bad_size, 27) \ + x(journal_entry_dev_usage_bad_dev, 28) \ + x(journal_entry_dev_usage_bad_pad, 29) \ + x(btree_node_unreadable, 30) \ + x(btree_node_fault_injected, 31) \ + x(btree_node_bad_magic, 32) \ + x(btree_node_bad_seq, 33) \ + x(btree_node_unsupported_version, 34) \ + x(btree_node_bset_older_than_sb_min, 35) \ + x(btree_node_bset_newer_than_sb, 36) \ + x(btree_node_data_missing, 37) \ + x(btree_node_bset_after_end, 38) \ + x(btree_node_replicas_sectors_written_mismatch, 39) \ + x(btree_node_replicas_data_mismatch, 40) \ + x(bset_unknown_csum, 41) \ + x(bset_bad_csum, 42) \ + x(bset_past_end_of_btree_node, 43) \ + x(bset_wrong_sector_offset, 44) \ + x(bset_empty, 45) \ + x(bset_bad_seq, 46) \ + x(bset_blacklisted_journal_seq, 47) \ + x(first_bset_blacklisted_journal_seq, 48) \ + x(btree_node_bad_btree, 49) \ + x(btree_node_bad_level, 50) \ + x(btree_node_bad_min_key, 51) \ + x(btree_node_bad_max_key, 52) \ + x(btree_node_bad_format, 53) \ + x(btree_node_bkey_past_bset_end, 54) \ + x(btree_node_bkey_bad_format, 55) \ + x(btree_node_bad_bkey, 56) \ + x(btree_node_bkey_out_of_order, 57) \ + x(btree_root_bkey_invalid, 58) \ + x(btree_root_read_error, 59) \ + x(btree_root_bad_min_key, 50) \ + x(btree_root_bad_max_key, 61) \ + x(btree_node_read_error, 62) \ + x(btree_node_topology_bad_min_key, 63) \ + x(btree_node_topology_bad_max_key, 64) \ + x(btree_node_topology_overwritten_by_prev_node, 65) \ + x(btree_node_topology_overwritten_by_next_node, 66) \ + x(btree_node_topology_interior_node_empty, 67) \ + x(fs_usage_hidden_wrong, 68) \ + x(fs_usage_btree_wrong, 69) \ + x(fs_usage_data_wrong, 70) \ + x(fs_usage_cached_wrong, 71) \ + x(fs_usage_reserved_wrong, 72) \ + x(fs_usage_persistent_reserved_wrong, 73) \ + x(fs_usage_nr_inodes_wrong, 74) \ + x(fs_usage_replicas_wrong, 75) \ + x(dev_usage_buckets_wrong, 76) \ + x(dev_usage_sectors_wrong, 77) \ + x(dev_usage_fragmented_wrong, 78) \ + x(dev_usage_buckets_ec_wrong, 79) \ + x(bkey_version_in_future, 80) \ + x(bkey_u64s_too_small, 81) \ + x(bkey_invalid_type_for_btree, 82) \ + x(bkey_extent_size_zero, 83) \ + x(bkey_extent_size_greater_than_offset, 84) \ + x(bkey_size_nonzero, 85) \ + x(bkey_snapshot_nonzero, 86) \ + x(bkey_snapshot_zero, 87) \ + x(bkey_at_pos_max, 88) \ + x(bkey_before_start_of_btree_node, 89) \ + x(bkey_after_end_of_btree_node, 90) \ + x(bkey_val_size_nonzero, 91) \ + x(bkey_val_size_too_small, 92) \ + x(alloc_v1_val_size_bad, 93) \ + x(alloc_v2_unpack_error, 94) \ + x(alloc_v3_unpack_error, 95) \ + x(alloc_v4_val_size_bad, 96) \ + x(alloc_v4_backpointers_start_bad, 97) \ + x(alloc_key_data_type_bad, 98) \ + x(alloc_key_empty_but_have_data, 99) \ + x(alloc_key_dirty_sectors_0, 100) \ + x(alloc_key_data_type_inconsistency, 101) \ + x(alloc_key_to_missing_dev_bucket, 102) \ + x(alloc_key_cached_inconsistency, 103) \ + x(alloc_key_cached_but_read_time_zero, 104) \ + x(alloc_key_to_missing_lru_entry, 105) \ + x(alloc_key_data_type_wrong, 106) \ + x(alloc_key_gen_wrong, 107) \ + x(alloc_key_dirty_sectors_wrong, 108) \ + x(alloc_key_cached_sectors_wrong, 109) \ + x(alloc_key_stripe_wrong, 110) \ + x(alloc_key_stripe_redundancy_wrong, 111) \ + x(bucket_sector_count_overflow, 112) \ + x(bucket_metadata_type_mismatch, 113) \ + x(need_discard_key_wrong, 114) \ + x(freespace_key_wrong, 115) \ + x(freespace_hole_missing, 116) \ + x(bucket_gens_val_size_bad, 117) \ + x(bucket_gens_key_wrong, 118) \ + x(bucket_gens_hole_wrong, 119) \ + x(bucket_gens_to_invalid_dev, 120) \ + x(bucket_gens_to_invalid_buckets, 121) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ + x(need_discard_freespace_key_bad, 124) \ + x(backpointer_pos_wrong, 125) \ + x(backpointer_to_missing_device, 126) \ + x(backpointer_to_missing_alloc, 127) \ + x(backpointer_to_missing_ptr, 128) \ + x(lru_entry_at_time_0, 129) \ + x(lru_entry_to_invalid_bucket, 130) \ + x(lru_entry_bad, 131) \ + x(btree_ptr_val_too_big, 132) \ + x(btree_ptr_v2_val_too_big, 133) \ + x(btree_ptr_has_non_ptr, 134) \ + x(extent_ptrs_invalid_entry, 135) \ + x(extent_ptrs_no_ptrs, 136) \ + x(extent_ptrs_too_many_ptrs, 137) \ + x(extent_ptrs_redundant_crc, 138) \ + x(extent_ptrs_redundant_stripe, 139) \ + x(extent_ptrs_unwritten, 140) \ + x(extent_ptrs_written_and_unwritten, 141) \ + x(ptr_to_invalid_device, 142) \ + x(ptr_to_duplicate_device, 143) \ + x(ptr_after_last_bucket, 144) \ + x(ptr_before_first_bucket, 145) \ + x(ptr_spans_multiple_buckets, 146) \ + x(ptr_to_missing_backpointer, 147) \ + x(ptr_to_missing_alloc_key, 148) \ + x(ptr_to_missing_replicas_entry, 149) \ + x(ptr_to_missing_stripe, 150) \ + x(ptr_to_incorrect_stripe, 151) \ + x(ptr_gen_newer_than_bucket_gen, 152) \ + x(ptr_too_stale, 153) \ + x(stale_dirty_ptr, 154) \ + x(ptr_bucket_data_type_mismatch, 155) \ + x(ptr_cached_and_erasure_coded, 156) \ + x(ptr_crc_uncompressed_size_too_small, 157) \ + x(ptr_crc_csum_type_unknown, 158) \ + x(ptr_crc_compression_type_unknown, 159) \ + x(ptr_crc_redundant, 160) \ + x(ptr_crc_uncompressed_size_too_big, 161) \ + x(ptr_crc_nonce_mismatch, 162) \ + x(ptr_stripe_redundant, 163) \ + x(reservation_key_nr_replicas_invalid, 164) \ + x(reflink_v_refcount_wrong, 165) \ + x(reflink_p_to_missing_reflink_v, 166) \ + x(stripe_pos_bad, 167) \ + x(stripe_val_size_bad, 168) \ + x(stripe_sector_count_wrong, 169) \ + x(snapshot_tree_pos_bad, 170) \ + x(snapshot_tree_to_missing_snapshot, 171) \ + x(snapshot_tree_to_missing_subvol, 172) \ + x(snapshot_tree_to_wrong_subvol, 173) \ + x(snapshot_tree_to_snapshot_subvol, 174) \ + x(snapshot_pos_bad, 175) \ + x(snapshot_parent_bad, 176) \ + x(snapshot_children_not_normalized, 177) \ + x(snapshot_child_duplicate, 178) \ + x(snapshot_child_bad, 179) \ + x(snapshot_skiplist_not_normalized, 180) \ + x(snapshot_skiplist_bad, 181) \ + x(snapshot_should_not_have_subvol, 182) \ + x(snapshot_to_bad_snapshot_tree, 183) \ + x(snapshot_bad_depth, 184) \ + x(snapshot_bad_skiplist, 185) \ + x(subvol_pos_bad, 186) \ + x(subvol_not_master_and_not_snapshot, 187) \ + x(subvol_to_missing_root, 188) \ + x(subvol_root_wrong_bi_subvol, 189) \ + x(bkey_in_missing_snapshot, 190) \ + x(inode_pos_inode_nonzero, 191) \ + x(inode_pos_blockdev_range, 192) \ + x(inode_unpack_error, 193) \ + x(inode_str_hash_invalid, 194) \ + x(inode_v3_fields_start_bad, 195) \ + x(inode_snapshot_mismatch, 196) \ + x(inode_unlinked_but_clean, 197) \ + x(inode_unlinked_but_nlink_nonzero, 198) \ + x(inode_checksum_type_invalid, 199) \ + x(inode_compression_type_invalid, 200) \ + x(inode_subvol_root_but_not_dir, 201) \ + x(inode_i_size_dirty_but_clean, 202) \ + x(inode_i_sectors_dirty_but_clean, 203) \ + x(inode_i_sectors_wrong, 204) \ + x(inode_dir_wrong_nlink, 205) \ + x(inode_dir_multiple_links, 206) \ + x(inode_multiple_links_but_nlink_0, 207) \ + x(inode_wrong_backpointer, 208) \ + x(inode_wrong_nlink, 209) \ + x(inode_unreachable, 210) \ + x(deleted_inode_but_clean, 211) \ + x(deleted_inode_missing, 212) \ + x(deleted_inode_is_dir, 213) \ + x(deleted_inode_not_unlinked, 214) \ + x(extent_overlapping, 215) \ + x(extent_in_missing_inode, 216) \ + x(extent_in_non_reg_inode, 217) \ + x(extent_past_end_of_inode, 218) \ + x(dirent_empty_name, 219) \ + x(dirent_val_too_big, 220) \ + x(dirent_name_too_long, 221) \ + x(dirent_name_embedded_nul, 222) \ + x(dirent_name_dot_or_dotdot, 223) \ + x(dirent_name_has_slash, 224) \ + x(dirent_d_type_wrong, 225) \ + x(dirent_d_parent_subvol_wrong, 226) \ + x(dirent_in_missing_dir_inode, 227) \ + x(dirent_in_non_dir_inode, 228) \ + x(dirent_to_missing_inode, 229) \ + x(dirent_to_missing_subvol, 230) \ + x(dirent_to_itself, 231) \ + x(quota_type_invalid, 232) \ + x(xattr_val_size_too_small, 233) \ + x(xattr_val_size_too_big, 234) \ + x(xattr_invalid_type, 235) \ + x(xattr_name_invalid_chars, 236) \ + x(xattr_in_missing_inode, 237) \ + x(root_subvol_missing, 238) \ + x(root_dir_missing, 239) \ + x(root_inode_not_dir, 240) \ + x(dir_loop, 241) \ + x(hash_table_key_duplicate, 242) \ + x(hash_table_key_wrong_offset, 243) + +enum bch_sb_error_id { +#define x(t, n) BCH_FSCK_ERR_##t = n, + BCH_SB_ERRS() +#undef x + BCH_SB_ERR_MAX +}; + +extern const struct bch_sb_field_ops bch_sb_field_ops_errors; + +void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); + +void bch2_sb_errors_from_cpu(struct bch_fs *); + +void bch2_fs_sb_errors_exit(struct bch_fs *); +void bch2_fs_sb_errors_init_early(struct bch_fs *); +int bch2_fs_sb_errors_init(struct bch_fs *); + +#endif /* _BCACHEFS_SB_ERRORS_H */ diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h new file mode 100644 index 000000000000..b1c099843a39 --- /dev/null +++ b/fs/bcachefs/sb-errors_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_ERRORS_TYPES_H +#define _BCACHEFS_SB_ERRORS_TYPES_H + +#include "darray.h" + +struct bch_sb_error_entry_cpu { + u64 id:16, + nr:48; + u64 last_error_time; +}; + +typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu; + +#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */ + diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 6dd85bb996fe..bed0f857fe5b 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -7,21 +7,28 @@ #include "sb-members.h" #include "super-io.h" -/* Code for bch_sb_field_members_v1: */ +#define x(t, n, ...) [n] = #t, +static const char * const bch2_iops_measurements[] = { + BCH_IOPS_MEASUREMENTS() + NULL +}; -static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i) -{ - return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); -} +char * const bch2_member_error_strs[] = { + BCH_MEMBER_ERROR_TYPES() + NULL +}; +#undef x + +/* Code for bch_sb_field_members_v1: */ struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) { - return members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); + return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); } static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) { - struct bch_member ret, *p = members_v2_get_mut(mi, i); + struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); memset(&ret, 0, sizeof(ret)); memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); return ret; @@ -36,7 +43,8 @@ static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int { struct bch_member ret, *p = members_v1_get_mut(mi, i); memset(&ret, 0, sizeof(ret)); - memcpy(&ret, p, min_t(size_t, sizeof(struct bch_member), sizeof(ret))); return ret; + memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); + return ret; } struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) @@ -62,7 +70,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); - memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); + memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); memset(dst + le16_to_cpu(mi->member_bytes), 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); } @@ -71,7 +79,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) return 0; } -int bch2_members_v2_init(struct bch_fs *c) +int bch2_sb_members_v2_init(struct bch_fs *c) { struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v2 *mi2; @@ -91,7 +99,7 @@ int bch2_members_v2_init(struct bch_fs *c) return sb_members_v2_resize_entries(c); } -int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) +int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) { struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v2 *mi2; @@ -105,7 +113,7 @@ int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) - memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); + memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); return 0; } @@ -155,6 +163,8 @@ static void member_to_text(struct printbuf *out, u64 bucket_size = le16_to_cpu(m.bucket_size); u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; + if (!bch2_member_exists(&m)) + return; prt_printf(out, "Device:"); prt_tab(out); @@ -163,6 +173,21 @@ static void member_to_text(struct printbuf *out, printbuf_indent_add(out, 2); + prt_printf(out, "Label:"); + prt_tab(out); + if (BCH_MEMBER_GROUP(&m)) { + unsigned idx = BCH_MEMBER_GROUP(&m) - 1; + + if (idx < disk_groups_nr(gi)) + prt_printf(out, "%s (%u)", + gi->entries[idx].label, idx); + else + prt_printf(out, "(bad disk labels section)"); + } else { + prt_printf(out, "(none)"); + } + prt_newline(out); + prt_printf(out, "UUID:"); prt_tab(out); pr_uuid(out, m.uuid.b); @@ -173,6 +198,13 @@ static void member_to_text(struct printbuf *out, prt_units_u64(out, device_size << 9); prt_newline(out); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s errors:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, le64_to_cpu(m.errors[i])); + prt_newline(out); + } + for (unsigned i = 0; i < BCH_IOPS_NR; i++) { prt_printf(out, "%s iops:", bch2_iops_measurements[i]); prt_tab(out); @@ -198,7 +230,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Last mount:"); prt_tab(out); if (m.last_mount) - pr_time(out, le64_to_cpu(m.last_mount)); + bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); else prt_printf(out, "(never)"); prt_newline(out); @@ -211,21 +243,6 @@ static void member_to_text(struct printbuf *out, : "unknown"); prt_newline(out); - prt_printf(out, "Label:"); - prt_tab(out); - if (BCH_MEMBER_GROUP(&m)) { - unsigned idx = BCH_MEMBER_GROUP(&m) - 1; - - if (idx < disk_groups_nr(gi)) - prt_printf(out, "%s (%u)", - gi->entries[idx].label, idx); - else - prt_printf(out, "(bad disk labels section)"); - } else { - prt_printf(out, "(none)"); - } - prt_newline(out); - prt_printf(out, "Data allowed:"); prt_tab(out); if (BCH_MEMBER_DATA_ALLOWED(&m)) @@ -262,8 +279,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); unsigned i; - if ((void *) members_v1_get_mut(mi, sb->nr_devices) > - vstruct_end(&mi->field)) { + if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { prt_printf(err, "too many devices for section size"); return -BCH_ERR_invalid_sb_members; } @@ -286,10 +302,8 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); unsigned i; - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = members_v1_get(mi, i); - member_to_text(out, m, gi, sb, i); - } + for (i = 0; i < sb->nr_devices; i++) + member_to_text(out, members_v1_get(mi, i), gi, sb, i); } const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { @@ -304,10 +318,8 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); unsigned i; - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = members_v2_get(mi, i); - member_to_text(out, m, gi, sb, i); - } + for (i = 0; i < sb->nr_devices; i++) + member_to_text(out, members_v2_get(mi, i), gi, sb, i); } static int bch2_sb_members_v2_validate(struct bch_sb *sb, @@ -315,7 +327,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct printbuf *err) { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); - size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) - + size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - (void *) mi; if (mi_bytes > vstruct_bytes(&mi->field)) { @@ -337,3 +349,72 @@ const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { .validate = bch2_sb_members_v2_validate, .to_text = bch2_sb_members_v2_to_text, }; + +void bch2_sb_members_from_cpu(struct bch_fs *c) +{ + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); + struct bch_dev *ca; + unsigned i, e; + + rcu_read_lock(); + for_each_member_device_rcu(ca, c, i, NULL) { + struct bch_member *m = __bch2_members_v2_get_mut(mi, i); + + for (e = 0; e < BCH_MEMBER_ERROR_NR; e++) + m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); + } + rcu_read_unlock(); +} + +void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bch_member m; + + mutex_lock(&ca->fs->sb_lock); + m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); + mutex_unlock(&ca->fs->sb_lock); + + printbuf_tabstop_push(out, 12); + + prt_str(out, "IO errors since filesystem creation"); + prt_newline(out); + + printbuf_indent_add(out, 2); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, atomic64_read(&ca->errors[i])); + prt_newline(out); + } + printbuf_indent_sub(out, 2); + + prt_str(out, "IO errors since "); + bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); + prt_str(out, " ago"); + prt_newline(out); + + printbuf_indent_add(out, 2); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); + prt_newline(out); + } + printbuf_indent_sub(out, 2); +} + +void bch2_dev_errors_reset(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bch_member *m; + + mutex_lock(&c->sb_lock); + m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) + m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); + m->errors_reset_time = ktime_get_real_seconds(); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 430f3457bfd4..03613e3eb8e3 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -2,8 +2,16 @@ #ifndef _BCACHEFS_SB_MEMBERS_H #define _BCACHEFS_SB_MEMBERS_H -int bch2_members_v2_init(struct bch_fs *c); -int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); +extern char * const bch2_member_error_strs[]; + +static inline struct bch_member * +__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i) +{ + return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); +} + +int bch2_sb_members_v2_init(struct bch_fs *c); +int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); @@ -179,4 +187,41 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1; extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2; +static inline bool bch2_member_exists(struct bch_member *m) +{ + return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); +} + +static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev) +{ + if (dev < sb->nr_devices) { + struct bch_member m = bch2_sb_member_get(sb, dev); + return bch2_member_exists(&m); + } + return false; +} + +static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) +{ + return (struct bch_member_cpu) { + .nbuckets = le64_to_cpu(mi->nbuckets), + .first_bucket = le16_to_cpu(mi->first_bucket), + .bucket_size = le16_to_cpu(mi->bucket_size), + .group = BCH_MEMBER_GROUP(mi), + .state = BCH_MEMBER_STATE(mi), + .discard = BCH_MEMBER_DISCARD(mi), + .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi), + .durability = BCH_MEMBER_DURABILITY(mi) + ? BCH_MEMBER_DURABILITY(mi) - 1 + : 1, + .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), + .valid = bch2_member_exists(mi), + }; +} + +void bch2_sb_members_from_cpu(struct bch_fs *); + +void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); +void bch2_dev_errors_reset(struct bch_dev *); + #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index b684b9f00c1b..b775cf0fb7cb 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -11,6 +11,8 @@ #include <linux/sched/task.h> #include <linux/slab.h> +#include <trace/events/lock.h> + #include "six.h" #ifdef DEBUG @@ -462,11 +464,12 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, smp_mb__after_atomic(); } + trace_contention_begin(lock, 0); + lock_contended(&lock->dep_map, ip); + if (six_optimistic_spin(lock, type)) goto out; - lock_contended(&lock->dep_map, ip); - wait->task = current; wait->lock_want = type; wait->lock_acquired = false; @@ -546,6 +549,7 @@ out: six_clear_bitmask(lock, SIX_LOCK_HELD_write); six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); } + trace_contention_end(lock, 0); return ret; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 4982468bfe11..e9af77b384c7 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -30,17 +30,18 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1)), c, err, + snapshot_tree_pos_bad, + "bad pos"); +fsck_err: + return ret; } int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, @@ -202,68 +203,60 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_snapshot s; u32 i, id; + int ret = 0; - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1)), c, err, + snapshot_pos_bad, + "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - if (id && id <= k.k->p.offset) { - prt_printf(err, "bad parent node (%u <= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, + snapshot_parent_bad, + "bad parent node (%u <= %llu)", + id, k.k->p.offset); - if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { - prt_printf(err, "children not normalized"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, + snapshot_children_not_normalized, + "children not normalized"); - if (s.v->children[0] && - s.v->children[0] == s.v->children[1]) { - prt_printf(err, "duplicate child nodes"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, + snapshot_child_duplicate, + "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - if (id >= k.k->p.offset) { - prt_printf(err, "bad child node (%u >= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id >= k.k->p.offset, c, err, + snapshot_child_bad, + "bad child node (%u >= %llu)", + id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { - if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { - prt_printf(err, "skiplist not normalized"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, + snapshot_skiplist_not_normalized, + "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - if ((id && !s.v->parent) || - (id && id <= k.k->p.offset)) { - prt_printf(err, "bad skiplist node %u", id); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, + snapshot_skiplist_bad, + "bad skiplist node %u", id); } } - - return 0; +fsck_err: + return ret; } static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) @@ -325,8 +318,9 @@ int bch2_mark_snapshot(struct btree_trans *trans, __set_is_ancestor_bitmap(c, id); if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots); + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) + bch2_delete_dead_snapshots_async(c); } } else { memset(t, 0, sizeof(*t)); @@ -529,7 +523,7 @@ static int check_snapshot_tree(struct btree_trans *trans, if (fsck_err_on(ret || root_id != bch2_snapshot_root(c, root_id) || st.k->p.offset != le32_to_cpu(s.tree), - c, + c, snapshot_tree_to_missing_snapshot, "snapshot tree points to missing/incorrect snapshot:\n %s", (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -541,17 +535,20 @@ static int check_snapshot_tree(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, + c, snapshot_tree_to_missing_subvol, "snapshot tree points to missing subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || fsck_err_on(!bch2_snapshot_is_ancestor_early(c, le32_to_cpu(subvol.snapshot), - root_id), c, + root_id), + c, snapshot_tree_to_wrong_subvol, "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c, + fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), + c, snapshot_tree_to_snapshot_subvol, "snapshot tree points to snapshot subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { @@ -787,7 +784,9 @@ static int check_snapshot(struct btree_trans *trans, goto err; } } else { - if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", + if (fsck_err_on(s.subvol, + c, snapshot_should_not_have_subvol, + "snapshot should not point to subvol:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -803,7 +802,8 @@ static int check_snapshot(struct btree_trans *trans, if (ret < 0) goto err; - if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", + if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree, + "snapshot points to missing/incorrect tree:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = snapshot_tree_ptr_repair(trans, iter, k, &s); if (ret) @@ -815,7 +815,8 @@ static int check_snapshot(struct btree_trans *trans, if (le32_to_cpu(s.depth) != real_depth && (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", + fsck_err(c, snapshot_bad_depth, + "snapshot with incorrect depth field, should be %u:\n %s", real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -832,7 +833,8 @@ static int check_snapshot(struct btree_trans *trans, if (!ret && (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with bad skiplist field:\n %s", + fsck_err(c, snapshot_bad_skiplist, + "snapshot with bad skiplist field:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -1251,13 +1253,7 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans, return 0; } -/* - * For a given snapshot, if it doesn't have a subvolume that points to it, and - * it doesn't have child snapshot nodes - it's now redundant and we can mark it - * as deleted. - */ -static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k) +static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) { struct bkey_s_c_snapshot snap; u32 children[2]; @@ -1278,10 +1274,21 @@ static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btre bch2_snapshot_live(trans, children[1]); if (ret < 0) return ret; + return !ret; +} - if (!ret) - return bch2_snapshot_node_set_deleted(trans, k.k->p.offset); - return 0; +/* + * For a given snapshot, if it doesn't have a subvolume that points to it, and + * it doesn't have child snapshot nodes - it's now redundant and we can mark it + * as deleted. + */ +static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) +{ + int ret = bch2_snapshot_needs_delete(trans, k); + + return ret <= 0 + ? ret + : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); } static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, @@ -1342,12 +1349,12 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, u32 id = le32_to_cpu(s->v.skip[j]); if (snapshot_list_has_id(deleted, id)) { - id = depth > 1 - ? bch2_snapshot_nth_parent_skip(c, + id = bch2_snapshot_nth_parent_skip(c, parent, - get_random_u32_below(depth - 1), - deleted) - : parent; + depth > 1 + ? get_random_u32_below(depth - 1) + : 0, + deleted); s->v.skip[j] = cpu_to_le32(id); } } @@ -1369,6 +1376,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) u32 *i, id; int ret = 0; + if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) + return 0; + if (!test_bit(BCH_FS_STARTED, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { @@ -1386,7 +1396,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, NULL, NULL, 0, - bch2_delete_redundant_snapshot(trans, &iter, k)); + bch2_delete_redundant_snapshot(trans, k)); if (ret) { bch_err_msg(c, ret, "deleting redundant snapshots"); goto err; @@ -1427,6 +1437,15 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!btree_type_has_snapshots(id)) continue; + /* + * deleted inodes btree is maintained by a trigger on the inodes + * btree - no work for us to do here, and it's not safe to scan + * it because we'll see out of date keys due to the btree write + * buffer: + */ + if (id == BTREE_ID_deleted_inodes) + continue; + ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, @@ -1447,6 +1466,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) } } + bch2_trans_unlock(trans); down_write(&c->snapshot_create_lock); for_each_btree_key(trans, iter, BTREE_ID_snapshots, @@ -1491,8 +1511,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err_create_lock; } } - - clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); err_create_lock: up_write(&c->snapshot_create_lock); err: @@ -1508,8 +1526,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); - if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) - bch2_delete_dead_snapshots(c); + bch2_delete_dead_snapshots(c); bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } @@ -1520,20 +1537,6 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } -int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, - struct btree_trans_commit_hook *h) -{ - struct bch_fs *c = trans->c; - - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - - if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots) - return 0; - - bch2_delete_dead_snapshots_async(c); - return 0; -} - int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, enum btree_id id, struct bpos pos) @@ -1664,6 +1667,26 @@ again: return ret ?: trans_was_restarted(trans, restart_count); } +static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c_snapshot snap; + int ret = 0; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + snap = bkey_s_c_to_snapshot(k); + if (BCH_SNAPSHOT_DELETED(snap.v) || + bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || + (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + return 0; + } + + return ret; +} + int bch2_snapshots_read(struct bch_fs *c) { struct btree_iter iter; @@ -1674,7 +1697,8 @@ int bch2_snapshots_read(struct bch_fs *c) for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_snapshot_set_equiv(trans, k)) ?: + bch2_snapshot_set_equiv(trans, k) ?: + bch2_check_snapshot_needs_deletion(trans, k)) ?: for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index de215d9d1252..f09a22f44239 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,7 +5,7 @@ enum bkey_invalid_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ @@ -19,7 +19,7 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); @@ -244,8 +244,6 @@ int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); -int bch2_delete_dead_snapshots_hook(struct btree_trans *, - struct btree_trans_commit_hook *); void bch2_delete_dead_snapshots_work(struct work_struct *); int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index caf2dd7dafff..fccd25aa3242 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -62,7 +62,8 @@ static int check_subvol(struct btree_trans *trans, if (ret) return ret; - if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c, + if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, + c, subvol_not_master_and_not_snapshot, "subvolume %llu is not set as snapshot but is not master subvolume", k.k->p.offset)) { struct bkey_i_subvolume *s = @@ -97,16 +98,17 @@ int bch2_check_subvols(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX)) { - prt_printf(err, "invalid pos"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || + bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, + subvol_pos_bad, + "invalid pos"); +fsck_err: + return ret; } void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, @@ -230,7 +232,6 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; struct bkey_s_c_subvolume subvol; - struct btree_trans_commit_hook *h; u32 snapid; int ret = 0; @@ -246,22 +247,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) snapid = le32_to_cpu(subvol.v->snapshot); - ret = bch2_btree_delete_at(trans, &iter, 0); - if (ret) - goto err; - - ret = bch2_snapshot_node_set_deleted(trans, snapid); - if (ret) - goto err; - - h = bch2_trans_kmalloc(trans, sizeof(*h)); - ret = PTR_ERR_OR_ZERO(h); - if (ret) - goto err; - - h->fn = bch2_delete_dead_snapshots_hook; - bch2_trans_commit_hook(trans, h); -err: + ret = bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_snapshot_node_set_deleted(trans, snapid); bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index bb14f92e8687..a1003d30ab0a 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -9,7 +9,7 @@ enum bkey_invalid_flags; int bch2_check_subvols(struct bch_fs *); -int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 332d41e1c0a3..f4cad903f4d6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -13,6 +13,7 @@ #include "replicas.h" #include "quota.h" #include "sb-clean.h" +#include "sb-errors.h" #include "sb-members.h" #include "super-io.h" #include "super.h" @@ -720,7 +721,7 @@ retry: if (opt_defined(*opts, sb)) goto err; - printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s", + printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n", path, err.buf); printbuf_reset(&err); @@ -782,7 +783,7 @@ got_super: ret = bch2_sb_validate(sb, &err, READ); if (ret) { - printk(KERN_ERR "bcachefs (%s): error validating superblock: %s", + printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); goto err_no_print; } @@ -790,7 +791,7 @@ out: printbuf_exit(&err); return ret; err: - printk(KERN_ERR "bcachefs (%s): error reading superblock: %s", + printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n", path, err.buf); err_no_print: bch2_free_super(sb); @@ -805,7 +806,12 @@ static void write_super_endio(struct bio *bio) /* XXX: return errors directly */ - if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, + bio_data_dir(bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "superblock %s error: %s", + bio_data_dir(bio) ? "write" : "read", bch2_blk_status_to_str(bio->bi_status))) ca->sb_write_error = 1; @@ -892,7 +898,9 @@ int bch2_write_super(struct bch_fs *c) SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); bch2_sb_counters_from_cpu(c); - bch_members_cpy_v2_v1(&c->disk_sb); + bch2_sb_members_from_cpu(c); + bch2_sb_members_cpy_v2_v1(&c->disk_sb); + bch2_sb_errors_from_cpu(c); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -1175,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "Created:"); prt_tab(out); if (sb->time_base_lo) - pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); + bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); else prt_printf(out, "(not set)"); prt_newline(out); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index b0d8584f475f..f5abd102bff7 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -23,6 +23,11 @@ u64 bch2_upgrade_recovery_passes(struct bch_fs *c, unsigned, unsigned); +static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) +{ + return le32_to_cpu(f->u64s) * sizeof(u64); +} + #define field_to_type(_f, _name) \ container_of_or_null(_f, struct bch_sb_field_##_name, field) @@ -78,41 +83,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -/* BCH_SB_FIELD_members_v1: */ - -static inline bool bch2_member_exists(struct bch_member *m) -{ - return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); -} - -static inline bool bch2_dev_exists(struct bch_sb *sb, - unsigned dev) -{ - if (dev < sb->nr_devices) { - struct bch_member m = bch2_sb_member_get(sb, dev); - return bch2_member_exists(&m); - } - return false; -} - -static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) -{ - return (struct bch_member_cpu) { - .nbuckets = le64_to_cpu(mi->nbuckets), - .first_bucket = le16_to_cpu(mi->first_bucket), - .bucket_size = le16_to_cpu(mi->bucket_size), - .group = BCH_MEMBER_GROUP(mi), - .state = BCH_MEMBER_STATE(mi), - .discard = BCH_MEMBER_DISCARD(mi), - .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi), - .durability = BCH_MEMBER_DURABILITY(mi) - ? BCH_MEMBER_DURABILITY(mi) - 1 - : 1, - .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), - .valid = bch2_member_exists(mi), - }; -} - void bch2_sb_maybe_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0e85c22672be..24672bb31cbe 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -49,6 +49,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-errors.h" #include "sb-members.h" #include "snapshot.h" #include "subvolume.h" @@ -400,7 +401,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch_info(c, "going read-write"); - ret = bch2_members_v2_init(c); + ret = bch2_sb_members_v2_init(c); if (ret) goto err; @@ -481,6 +482,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_time_stats_exit(&c->times[i]); bch2_free_pending_node_rewrites(c); + bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); bch2_fs_quota_exit(c); @@ -713,6 +715,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_quota_init(c); bch2_fs_ec_init_early(c); bch2_fs_move_init(c); + bch2_fs_sb_errors_init_early(c); INIT_LIST_HEAD(&c->list); @@ -729,8 +732,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->journal_iters); - INIT_LIST_HEAD(&c->fsck_errors); - mutex_init(&c->fsck_error_lock); + INIT_LIST_HEAD(&c->fsck_error_msgs); + mutex_init(&c->fsck_error_msgs_lock); seqcount_init(&c->gc_pos_lock); @@ -840,6 +843,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) } ret = bch2_fs_counters_init(c) ?: + bch2_fs_sb_errors_init(c) ?: bch2_io_clock_init(&c->io_clock[READ]) ?: bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_journal_init(&c->journal) ?: @@ -942,16 +946,13 @@ int bch2_fs_start(struct bch_fs *c) mutex_lock(&c->sb_lock); - ret = bch2_members_v2_init(c); + ret = bch2_sb_members_v2_init(c); if (ret) { mutex_unlock(&c->sb_lock); goto err; } for_each_online_member(ca, c, i) - bch2_sb_from_fs(c, ca); - - for_each_online_member(ca, c, i) bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now); mutex_unlock(&c->sb_lock); @@ -960,12 +961,6 @@ int bch2_fs_start(struct bch_fs *c) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - for (i = 0; i < BCH_TRANSACTIONS_NR; i++) { - mutex_lock(&c->btree_transaction_stats[i].lock); - bch2_time_stats_init(&c->btree_transaction_stats[i].lock_hold_times); - mutex_unlock(&c->btree_transaction_stats[i].lock); - } - ret = BCH_SB_INITIALIZED(c->disk_sb.sb) ? bch2_fs_recovery(c) : bch2_fs_initialize(c); @@ -1140,6 +1135,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, struct bch_member *member) { struct bch_dev *ca; + unsigned i; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) @@ -1157,6 +1153,10 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, bch2_time_stats_init(&ca->io_latency[WRITE]); ca->mi = bch2_mi_to_cpu(member); + + for (i = 0; i < ARRAY_SIZE(member->errors); i++) + atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); + ca->uuid = member->uuid; ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, @@ -1591,7 +1591,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); if (BCH_MEMBER_GROUP(&dev_mi)) { - bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); + bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); if (label.allocation_failure) { ret = -ENOMEM; goto err; @@ -1631,16 +1631,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_unlock; } - mi = bch2_sb_field_get(ca->disk_sb.sb, members_v2); - - if (!bch2_sb_field_resize(&ca->disk_sb, members_v2, - le32_to_cpu(mi->field.u64s) + - sizeof(dev_mi) / sizeof(u64))) { - ret = -BCH_ERR_ENOSPC_sb_members; - bch_err_msg(c, ret, "setting up new superblock"); - goto err_unlock; - } - if (dynamic_fault("bcachefs:add:no_slot")) goto no_slot; @@ -1654,6 +1644,8 @@ no_slot: have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); + + mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); @@ -1689,13 +1681,13 @@ have_slot: ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err_msg(c, ret, "marking new superblock"); + bch_err_msg(ca, ret, "marking new superblock"); goto err_late; } ret = bch2_fs_freespace_init(c); if (ret) { - bch_err_msg(c, ret, "initializing free space"); + bch_err_msg(ca, ret, "initializing free space"); goto err_late; } @@ -1763,19 +1755,26 @@ int bch2_dev_online(struct bch_fs *c, const char *path) if (ca->mi.state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - mutex_lock(&c->sb_lock); - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + if (!ca->mi.freespace_initialized) { + ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err; + } - m->last_mount = - cpu_to_le64(ktime_get_real_seconds()); + if (!ca->journal.nr) { + ret = bch2_dev_journal_alloc(ca); + bch_err_msg(ca, ret, "allocating journal"); + if (ret) + goto err; + } + mutex_lock(&c->sb_lock); + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); mutex_unlock(&c->sb_lock); - ret = bch2_fs_freespace_init(c); - if (ret) - bch_err_msg(c, ret, "initializing free space"); - up_write(&c->state_lock); return 0; err: @@ -1886,9 +1885,9 @@ found: struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_opts opts) { - struct bch_sb_handle *sb = NULL; + DARRAY(struct bch_sb_handle) sbs = { 0 }; struct bch_fs *c = NULL; - unsigned i, best_sb = 0; + struct bch_sb_handle *sb, *best = NULL; struct printbuf errbuf = PRINTBUF; int ret = 0; @@ -1900,49 +1899,46 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } - sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); - if (!sb) { - ret = -ENOMEM; + ret = darray_make_room(&sbs, nr_devices); + if (ret) goto err; - } - for (i = 0; i < nr_devices; i++) { - ret = bch2_read_super(devices[i], &opts, &sb[i]); + for (unsigned i = 0; i < nr_devices; i++) { + struct bch_sb_handle sb = { NULL }; + + ret = bch2_read_super(devices[i], &opts, &sb); if (ret) goto err; + BUG_ON(darray_push(&sbs, sb)); } - for (i = 1; i < nr_devices; i++) - if (le64_to_cpu(sb[i].sb->seq) > - le64_to_cpu(sb[best_sb].sb->seq)) - best_sb = i; - - i = 0; - while (i < nr_devices) { - if (i != best_sb && - !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) { - pr_info("%pg has been removed, skipping", sb[i].bdev); - bch2_free_super(&sb[i]); - array_remove_item(sb, nr_devices, i); + darray_for_each(sbs, sb) + if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) + best = sb; + + darray_for_each_reverse(sbs, sb) { + if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) { + pr_info("%pg has been removed, skipping", sb->bdev); + bch2_free_super(sb); + darray_remove_item(&sbs, sb); + best -= best > sb; continue; } - ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); + ret = bch2_dev_in_fs(best->sb, sb->sb); if (ret) goto err_print; - i++; } - c = bch2_fs_alloc(sb[best_sb].sb, opts); - if (IS_ERR(c)) { - ret = PTR_ERR(c); + c = bch2_fs_alloc(best->sb, opts); + ret = PTR_ERR_OR_ZERO(c); + if (ret) goto err; - } down_write(&c->state_lock); - for (i = 0; i < nr_devices; i++) { - ret = bch2_dev_attach_bdev(c, &sb[i]); + darray_for_each(sbs, sb) { + ret = bch2_dev_attach_bdev(c, sb); if (ret) { up_write(&c->state_lock); goto err; @@ -1961,7 +1957,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } out: - kfree(sb); + darray_for_each(sbs, sb) + bch2_free_super(sb); + darray_exit(&sbs); printbuf_exit(&errbuf); module_put(THIS_MODULE); return c; @@ -1971,9 +1969,6 @@ err_print: err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); - if (sb) - for (i = 0; i < nr_devices; i++) - bch2_free_super(&sb[i]); c = ERR_PTR(ret); goto out; } diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 78d6138db62d..7dda4985b99f 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -37,16 +37,4 @@ struct bch_member_cpu { u8 valid; }; -struct bch_disk_group_cpu { - bool deleted; - u16 parent; - struct bch_devs_mask devs; -}; - -struct bch_disk_groups_cpu { - struct rcu_head rcu; - unsigned nr; - struct bch_disk_group_cpu entries[] __counted_by(nr); -}; - #endif /* _BCACHEFS_SUPER_TYPES_H */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 397116966a7c..ab743115f169 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -149,7 +149,9 @@ read_attribute(bucket_size); read_attribute(first_bucket); read_attribute(nbuckets); rw_attribute(durability); -read_attribute(iodone); +read_attribute(io_done); +read_attribute(io_errors); +write_attribute(io_errors_reset); read_attribute(io_latency_read); read_attribute(io_latency_write); @@ -212,7 +214,7 @@ read_attribute(copy_gc_wait); rw_attribute(rebalance_enabled); sysfs_pd_controller_attribute(rebalance); -read_attribute(rebalance_work); +read_attribute(rebalance_status); rw_attribute(promote_whole_extents); read_attribute(new_stripes); @@ -341,7 +343,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) { - prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]); + prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); bch2_bpos_to_text(out, c->gc_gens_pos); prt_printf(out, "\n"); } @@ -386,8 +388,8 @@ SHOW(bch2_fs) if (attr == &sysfs_copy_gc_wait) bch2_copygc_wait_to_text(out, c); - if (attr == &sysfs_rebalance_work) - bch2_rebalance_work_to_text(out, c); + if (attr == &sysfs_rebalance_status) + bch2_rebalance_status_to_text(out, c); sysfs_print(promote_whole_extents, c->promote_whole_extents); @@ -646,7 +648,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_copy_gc_wait, &sysfs_rebalance_enabled, - &sysfs_rebalance_work, + &sysfs_rebalance_status, sysfs_pd_controller_files(rebalance), &sysfs_moving_ctxts, @@ -707,10 +709,8 @@ STORE(bch2_fs_opts_dir) bch2_opt_set_by_id(&c->opts, id, v); if ((id == Opt_background_target || - id == Opt_background_compression) && v) { - bch2_rebalance_add_work(c, S64_MAX); - rebalance_wakeup(c); - } + id == Opt_background_compression) && v) + bch2_set_rebalance_needs_scan(c, 0); ret = size; err: @@ -882,7 +882,7 @@ static const char * const bch2_rw[] = { NULL }; -static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca) +static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca) { int rw, i; @@ -910,13 +910,8 @@ SHOW(bch2_dev) sysfs_print(discard, ca->mi.discard); if (attr == &sysfs_label) { - if (ca->mi.group) { - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, - ca->mi.group - 1); - mutex_unlock(&c->sb_lock); - } - + if (ca->mi.group) + bch2_disk_path_to_text(out, c, ca->mi.group - 1); prt_char(out, '\n'); } @@ -930,8 +925,11 @@ SHOW(bch2_dev) prt_char(out, '\n'); } - if (attr == &sysfs_iodone) - dev_iodone_to_text(out, ca); + if (attr == &sysfs_io_done) + dev_io_done_to_text(out, ca); + + if (attr == &sysfs_io_errors) + bch2_dev_io_errors_to_text(out, ca); sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); @@ -998,6 +996,9 @@ STORE(bch2_dev) return ret; } + if (attr == &sysfs_io_errors_reset) + bch2_dev_errors_reset(ca); + return size; } SYSFS_OPS(bch2_dev); @@ -1015,7 +1016,9 @@ struct attribute *bch2_dev_files[] = { &sysfs_label, &sysfs_has_data, - &sysfs_iodone, + &sysfs_io_done, + &sysfs_io_errors, + &sysfs_io_errors_reset, &sysfs_io_latency_read, &sysfs_io_latency_write, diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index 33efa6005c6f..dc48b52b01b4 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -7,6 +7,7 @@ #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" +#include "move_types.h" #include "opts.h" #include "six.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 19264492151b..893304a1f06e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(btree_node, TP_printk("%d,%d %u %s %llu:%llu:%u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->level, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); @@ -461,7 +461,7 @@ TRACE_EVENT(btree_path_relock_fail, TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -522,7 +522,7 @@ TRACE_EVENT(btree_path_upgrade_fail, TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -767,25 +767,36 @@ DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, ); TRACE_EVENT(move_data, - TP_PROTO(struct bch_fs *c, u64 sectors_moved, - u64 keys_moved), - TP_ARGS(c, sectors_moved, keys_moved), + TP_PROTO(struct bch_fs *c, + struct bch_move_stats *stats), + TP_ARGS(c, stats), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(u64, sectors_moved ) + __field(dev_t, dev ) __field(u64, keys_moved ) + __field(u64, keys_raced ) + __field(u64, sectors_seen ) + __field(u64, sectors_moved ) + __field(u64, sectors_raced ) ), TP_fast_assign( - __entry->dev = c->dev; - __entry->sectors_moved = sectors_moved; - __entry->keys_moved = keys_moved; + __entry->dev = c->dev; + __entry->keys_moved = atomic64_read(&stats->keys_moved); + __entry->keys_raced = atomic64_read(&stats->keys_raced); + __entry->sectors_seen = atomic64_read(&stats->sectors_seen); + __entry->sectors_moved = atomic64_read(&stats->sectors_moved); + __entry->sectors_raced = atomic64_read(&stats->sectors_raced); ), - TP_printk("%d,%d sectors_moved %llu keys_moved %llu", + TP_printk("%d,%d keys moved %llu raced %llu" + "sectors seen %llu moved %llu raced %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->sectors_moved, __entry->keys_moved) + __entry->keys_moved, + __entry->keys_raced, + __entry->sectors_seen, + __entry->sectors_moved, + __entry->sectors_raced) ); TRACE_EVENT(evacuate_bucket, @@ -1012,7 +1023,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, TP_printk("%s %pS btree %s pos %llu:%llu:%u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) @@ -1032,13 +1043,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, TP_ARGS(trans, caller_ip, path) ); +struct get_locks_fail; + TRACE_EVENT(trans_restart_upgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, struct btree_path *path, unsigned old_locks_want, - unsigned new_locks_want), - TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want), + unsigned new_locks_want, + struct get_locks_fail *f), + TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f), TP_STRUCT__entry( __array(char, trans_fn, 32 ) @@ -1046,6 +1060,11 @@ TRACE_EVENT(trans_restart_upgrade, __field(u8, btree_id ) __field(u8, old_locks_want ) __field(u8, new_locks_want ) + __field(u8, level ) + __field(u32, path_seq ) + __field(u32, node_seq ) + __field(u32, path_alloc_seq ) + __field(u32, downgrade_seq) TRACE_BPOS_entries(pos) ), @@ -1055,18 +1074,28 @@ TRACE_EVENT(trans_restart_upgrade, __entry->btree_id = path->btree_id; __entry->old_locks_want = old_locks_want; __entry->new_locks_want = new_locks_want; + __entry->level = f->l; + __entry->path_seq = path->l[f->l].lock_seq; + __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq; + __entry->path_alloc_seq = path->alloc_seq; + __entry->downgrade_seq = path->downgrade_seq; TRACE_BPOS_assign(pos, path->pos) ), - TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u", + TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, __entry->old_locks_want, - __entry->new_locks_want) + __entry->new_locks_want, + __entry->level, + __entry->path_seq, + __entry->node_seq, + __entry->path_alloc_seq, + __entry->downgrade_seq) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, @@ -1219,7 +1248,7 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -1227,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, __entry->new_u64s) ); +TRACE_EVENT(path_downgrade, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, + struct btree_path *path), + TP_ARGS(trans, caller_ip, path), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + ), + + TP_printk("%s %pS", + __entry->trans_fn, + (void *) __entry->caller_ip) +); + DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 08bac0ba8d0b..84b142fcc3df 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -467,6 +467,24 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) prt_printf(out, "%s", u->name); } +#ifndef __KERNEL__ +#include <time.h> +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + time_t t = sec; + char buf[64]; + ctime_r(&t, buf); + prt_str(out, buf); +} +#else +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%ptT", &sec); + prt_u64(out, sec); +} +#endif + #define TABSTOP_SIZE 12 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 849a37ae497c..2984b57b2958 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -245,26 +245,7 @@ do { \ #define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__) void bch2_pr_time_units(struct printbuf *, u64); - -#ifdef __KERNEL__ -static inline void pr_time(struct printbuf *out, u64 time) -{ - prt_printf(out, "%llu", time); -} -#else -#include <time.h> -static inline void pr_time(struct printbuf *out, u64 _time) -{ - char time_str[64]; - time_t time = _time; - struct tm *tm = localtime(&time); - size_t err = strftime(time_str, sizeof(time_str), "%c", tm); - if (!err) - prt_printf(out, "(formatting error)"); - else - prt_printf(out, "%s", time_str); -} -#endif +void bch2_prt_datetime(struct printbuf *, time64_t); #ifdef __KERNEL__ static inline void uuid_unparse_lower(u8 *uuid, char *out) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index b069b1a62e25..a39ff0c296ec 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,46 +70,38 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - const struct xattr_handler *handler; struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); + unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len)); + int ret = 0; - if (bkey_val_u64s(k.k) < - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))) { - prt_printf(err, "value too small (%zu < %u)", - bkey_val_u64s(k.k), - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, + xattr_val_size_too_small, + "value too small (%zu < %u)", + bkey_val_u64s(k.k), val_u64s); /* XXX why +4 ? */ - if (bkey_val_u64s(k.k) > - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)); - return -BCH_ERR_invalid_bkey; - } - - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (!handler) { - prt_printf(err, "invalid type (%u)", xattr.v->x_type); - return -BCH_ERR_invalid_bkey; - } - - if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) { - prt_printf(err, "xattr name has invalid characters"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + val_u64s = xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len) + 4); + + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, + xattr_val_size_too_big, + "value too big (%zu > %u)", + bkey_val_u64s(k.k), val_u64s); + + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, + xattr_invalid_type, + "invalid type (%u)", xattr.v->x_type); + + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, + xattr_name_invalid_chars, + "xattr name has invalid characters"); +fsck_err: + return ret; } void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, @@ -590,7 +582,7 @@ err: if (value && (opt_id == Opt_background_compression || opt_id == Opt_background_target)) - bch2_rebalance_add_work(c, inode->v.i_blocks); + bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); return bch2_err_class(ret); } diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index f5a52e3a6016..1337f31a5c49 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,7 +6,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9acdec56f626..a93d76df8ed8 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -96,6 +96,7 @@ static const struct address_space_operations befs_symlink_aops = { }; static const struct export_operations befs_export_operations = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = befs_fh_to_dentry, .fh_to_parent = befs_fh_to_parent, .get_parent = befs_get_parent, diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index c53a1d220622..1564eacc253d 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include "super.h" +#include "mds_client.h" static inline void ceph_set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) @@ -31,6 +32,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu) { + struct ceph_client *cl = ceph_inode_to_client(inode); int size; unsigned int retry_cnt = 0; const char *name; @@ -72,8 +74,8 @@ retry: } else if (size == -ENODATA || size == 0) { acl = NULL; } else { - pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n", - ceph_vinop(inode), size); + pr_err_ratelimited_client(cl, "%llx.%llx failed, err=%d\n", + ceph_vinop(inode), size); acl = ERR_PTR(-EIO); } @@ -105,7 +107,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; if (acl) { - ret = posix_acl_update_mode(&nop_mnt_idmap, inode, + ret = posix_acl_update_mode(idmap, inode, &new_mode, &acl); if (ret) goto out; @@ -140,7 +142,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - ret = __ceph_setattr(inode, &newattrs, NULL); + ret = __ceph_setattr(idmap, inode, &newattrs, NULL); if (ret) goto out_free; } @@ -151,7 +153,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - __ceph_setattr(inode, &newattrs, NULL); + __ceph_setattr(idmap, inode, &newattrs, NULL); } goto out_free; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 936b9e0b351d..85be3bf18cdf 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -79,18 +79,18 @@ static inline struct ceph_snap_context *page_snap_context(struct page *page) */ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) { - struct inode *inode; + struct inode *inode = mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci; struct ceph_snap_context *snapc; if (folio_test_dirty(folio)) { - dout("%p dirty_folio %p idx %lu -- already dirty\n", - mapping->host, folio, folio->index); + doutc(cl, "%llx.%llx %p idx %lu -- already dirty\n", + ceph_vinop(inode), folio, folio->index); VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } - inode = mapping->host; ci = ceph_inode(inode); /* dirty the head */ @@ -111,12 +111,12 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; - dout("%p dirty_folio %p idx %lu head %d/%d -> %d/%d " - "snapc %p seq %lld (%d snaps)\n", - mapping->host, folio, folio->index, - ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, - ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, - snapc, snapc->seq, snapc->num_snaps); + doutc(cl, "%llx.%llx %p idx %lu head %d/%d -> %d/%d " + "snapc %p seq %lld (%d snaps)\n", + ceph_vinop(inode), folio, folio->index, + ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, + ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, + snapc, snapc->seq, snapc->num_snaps); spin_unlock(&ci->i_ceph_lock); /* @@ -137,23 +137,22 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) static void ceph_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode; - struct ceph_inode_info *ci; + struct inode *inode = folio->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; - inode = folio->mapping->host; - ci = ceph_inode(inode); if (offset != 0 || length != folio_size(folio)) { - dout("%p invalidate_folio idx %lu partial dirty page %zu~%zu\n", - inode, folio->index, offset, length); + doutc(cl, "%llx.%llx idx %lu partial dirty page %zu~%zu\n", + ceph_vinop(inode), folio->index, offset, length); return; } WARN_ON(!folio_test_locked(folio)); if (folio_test_private(folio)) { - dout("%p invalidate_folio idx %lu full dirty page\n", - inode, folio->index); + doutc(cl, "%llx.%llx idx %lu full dirty page\n", + ceph_vinop(inode), folio->index); snapc = folio_detach_private(folio); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); @@ -166,10 +165,10 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, static bool ceph_release_folio(struct folio *folio, gfp_t gfp) { struct inode *inode = folio->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); - dout("%llx:%llx release_folio idx %lu (%sdirty)\n", - ceph_vinop(inode), - folio->index, folio_test_dirty(folio) ? "" : "not "); + doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), + folio->index, folio_test_dirty(folio) ? "" : "not "); if (folio_test_private(folio)) return false; @@ -229,7 +228,7 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) { struct inode *inode = subreq->rreq->inode; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; u32 xlen; @@ -244,7 +243,8 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) static void finish_netfs_read(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); struct netfs_io_subrequest *subreq = req->r_priv; struct ceph_osd_req_op *op = &req->r_ops[0]; @@ -254,8 +254,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, osd_data->length, err); - dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result, - subreq->len, i_size_read(req->r_inode)); + doutc(cl, "result %d subreq->len=%zu i_size=%lld\n", req->r_result, + subreq->len, i_size_read(req->r_inode)); /* no object means success but no data */ if (err == -ENOENT) @@ -348,7 +348,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); struct iov_iter iter; @@ -383,7 +384,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) goto out; } - dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); + doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", + ceph_vinop(inode), subreq->start, subreq->len, len); iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); @@ -400,8 +402,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); if (err < 0) { - dout("%s: iov_ter_get_pages_alloc returned %d\n", - __func__, err); + doutc(cl, "%llx.%llx failed to allocate pages, %d\n", + ceph_vinop(inode), err); goto out; } @@ -429,12 +431,13 @@ out: ceph_osdc_put_request(req); if (err) netfs_subreq_terminated(subreq, err, false); - dout("%s: result %d\n", __func__, err); + doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); } static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int got = 0, want = CEPH_CAP_FILE_CACHE; struct ceph_netfs_request_data *priv; int ret = 0; @@ -466,12 +469,12 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) */ ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { - dout("start_read %p, error getting cap\n", inode); + doutc(cl, "%llx.%llx, error getting cap\n", ceph_vinop(inode)); goto out; } if (!(got & want)) { - dout("start_read %p, no cache cap\n", inode); + doutc(cl, "%llx.%llx, no cache cap\n", ceph_vinop(inode)); ret = -EACCES; goto out; } @@ -563,13 +566,14 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, struct ceph_snap_context *page_snapc) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *snapc = NULL; struct ceph_cap_snap *capsnap = NULL; spin_lock(&ci->i_ceph_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, - capsnap->context, capsnap->dirty_pages); + doutc(cl, " capsnap %p snapc %p has %d dirty pages\n", + capsnap, capsnap->context, capsnap->dirty_pages); if (!capsnap->dirty_pages) continue; @@ -601,8 +605,8 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, } if (!snapc && ci->i_wrbuffer_ref_head) { snapc = ceph_get_snap_context(ci->i_head_snapc); - dout(" head snapc %p has %d dirty pages\n", - snapc, ci->i_wrbuffer_ref_head); + doutc(cl, " head snapc %p has %d dirty pages\n", snapc, + ci->i_wrbuffer_ref_head); if (ctl) { ctl->i_size = i_size_read(inode); ctl->truncate_size = ci->i_truncate_size; @@ -658,7 +662,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct folio *folio = page_folio(page); struct inode *inode = page->mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); int err; @@ -670,7 +675,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) bool caching = ceph_is_cache_enabled(inode); struct page *bounce_page = NULL; - dout("writepage %p idx %lu\n", page, page->index); + doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page, + page->index); if (ceph_inode_is_shutdown(inode)) return -EIO; @@ -678,13 +684,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { - dout("writepage %p page %p not dirty?\n", inode, page); + doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode), + page); return 0; } oldest = get_oldest_context(inode, &ceph_wbc, snapc); if (snapc->seq > oldest->seq) { - dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, snapc); + doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n", + ceph_vinop(inode), page, snapc); /* we should only noop if called by kswapd */ WARN_ON(!(current->flags & PF_MEMALLOC)); ceph_put_snap_context(oldest); @@ -695,8 +702,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) /* is this a partial page at end of file? */ if (page_off >= ceph_wbc.i_size) { - dout("folio at %lu beyond eof %llu\n", folio->index, - ceph_wbc.i_size); + doutc(cl, "%llx.%llx folio at %lu beyond eof %llu\n", + ceph_vinop(inode), folio->index, ceph_wbc.i_size); folio_invalidate(folio, 0, folio_size(folio)); return 0; } @@ -705,8 +712,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = ceph_wbc.i_size - page_off; wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len; - dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n", - inode, page, page->index, page_off, wlen, snapc, snapc->seq); + doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n", + ceph_vinop(inode), page, page->index, page_off, wlen, snapc, + snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) @@ -747,8 +755,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osd_req_op_extent_osd_data_pages(req, 0, bounce_page ? &bounce_page : &page, wlen, 0, false, false); - dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n", - page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not "); + doutc(cl, "%llx.%llx %llu~%llu (%llu bytes, %sencrypted)\n", + ceph_vinop(inode), page_off, len, wlen, + IS_ENCRYPTED(inode) ? "" : "not "); req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(osdc, req); @@ -767,19 +776,21 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) wbc = &tmp_wbc; if (err == -ERESTARTSYS) { /* killed by SIGKILL */ - dout("writepage interrupted page %p\n", page); + doutc(cl, "%llx.%llx interrupted page %p\n", + ceph_vinop(inode), page); redirty_page_for_writepage(wbc, page); end_page_writeback(page); return err; } if (err == -EBLOCKLISTED) fsc->blocklisted = true; - dout("writepage setting page/mapping error %d %p\n", - err, page); + doutc(cl, "%llx.%llx setting page/mapping error %d %p\n", + ceph_vinop(inode), err, page); mapping_set_error(&inode->i_data, err); wbc->pages_skipped++; } else { - dout("writepage cleaned page %p\n", page); + doutc(cl, "%llx.%llx cleaned page %p\n", + ceph_vinop(inode), page); err = 0; /* vfs expects us to return 0 */ } oldest = detach_page_private(page); @@ -803,7 +814,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) ihold(inode); if (wbc->sync_mode == WB_SYNC_NONE && - ceph_inode_to_client(inode)->write_congested) + ceph_inode_to_fs_client(inode)->write_congested) return AOP_WRITEPAGE_ACTIVATE; wait_on_page_fscache(page); @@ -829,6 +840,7 @@ static void writepages_finish(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_osd_data *osd_data; struct page *page; int num_pages, total_pages = 0; @@ -836,11 +848,11 @@ static void writepages_finish(struct ceph_osd_request *req) int rc = req->r_result; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); unsigned int len = 0; bool remove_page; - dout("writepages_finish %p rc %d\n", inode, rc); + doutc(cl, "%llx.%llx rc %d\n", ceph_vinop(inode), rc); if (rc < 0) { mapping_set_error(mapping, rc); ceph_set_error_write(ci); @@ -862,8 +874,10 @@ static void writepages_finish(struct ceph_osd_request *req) /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { - pr_warn("%s incorrect op %d req %p index %d tid %llu\n", - __func__, req->r_ops[i].op, req, i, req->r_tid); + pr_warn_client(cl, + "%llx.%llx incorrect op %d req %p index %d tid %llu\n", + ceph_vinop(inode), req->r_ops[i].op, req, i, + req->r_tid); break; } @@ -890,7 +904,7 @@ static void writepages_finish(struct ceph_osd_request *req) ceph_put_snap_context(detach_page_private(page)); end_page_writeback(page); - dout("unlocking %p\n", page); + doutc(cl, "unlocking %p\n", page); if (remove_page) generic_error_remove_page(inode->i_mapping, @@ -898,8 +912,9 @@ static void writepages_finish(struct ceph_osd_request *req) unlock_page(page); } - dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n", - inode, osd_data->length, rc >= 0 ? num_pages : 0); + doutc(cl, "%llx.%llx wrote %llu bytes cleaned %d pages\n", + ceph_vinop(inode), osd_data->length, + rc >= 0 ? num_pages : 0); release_pages(osd_data->pages, num_pages); } @@ -926,7 +941,8 @@ static int ceph_writepages_start(struct address_space *mapping, { struct inode *inode = mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_vino vino = ceph_vino(inode); pgoff_t index, start_index, end = -1; struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; @@ -944,15 +960,15 @@ static int ceph_writepages_start(struct address_space *mapping, fsc->write_congested) return 0; - dout("writepages_start %p (mode=%s)\n", inode, - wbc->sync_mode == WB_SYNC_NONE ? "NONE" : - (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); + doutc(cl, "%llx.%llx (mode=%s)\n", ceph_vinop(inode), + wbc->sync_mode == WB_SYNC_NONE ? "NONE" : + (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); if (ceph_inode_is_shutdown(inode)) { if (ci->i_wrbuffer_ref > 0) { - pr_warn_ratelimited( - "writepage_start %p %lld forced umount\n", - inode, ceph_ino(inode)); + pr_warn_ratelimited_client(cl, + "%llx.%llx %lld forced umount\n", + ceph_vinop(inode), ceph_ino(inode)); } mapping_set_error(mapping, -EIO); return -EIO; /* we're in a forced umount, don't write! */ @@ -976,11 +992,11 @@ retry: if (!snapc) { /* hmm, why does writepages get called when there is no dirty data? */ - dout(" no snap context with dirty data?\n"); + doutc(cl, " no snap context with dirty data?\n"); goto out; } - dout(" oldest snapc is %p seq %lld (%d snaps)\n", - snapc, snapc->seq, snapc->num_snaps); + doutc(cl, " oldest snapc is %p seq %lld (%d snaps)\n", snapc, + snapc->seq, snapc->num_snaps); should_loop = false; if (ceph_wbc.head_snapc && snapc != last_snapc) { @@ -990,13 +1006,13 @@ retry: end = -1; if (index > 0) should_loop = true; - dout(" cyclic, start at %lu\n", index); + doutc(cl, " cyclic, start at %lu\n", index); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = true; - dout(" not cyclic, %lu to %lu\n", index, end); + doutc(cl, " not cyclic, %lu to %lu\n", index, end); } } else if (!ceph_wbc.head_snapc) { /* Do not respect wbc->range_{start,end}. Dirty pages @@ -1005,7 +1021,7 @@ retry: * associated with 'snapc' get written */ if (index > 0) should_loop = true; - dout(" non-head snapc, range whole\n"); + doutc(cl, " non-head snapc, range whole\n"); } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) @@ -1028,12 +1044,12 @@ retry: get_more_pages: nr_folios = filemap_get_folios_tag(mapping, &index, end, tag, &fbatch); - dout("pagevec_lookup_range_tag got %d\n", nr_folios); + doutc(cl, "pagevec_lookup_range_tag got %d\n", nr_folios); if (!nr_folios && !locked_pages) break; for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { page = &fbatch.folios[i]->page; - dout("? %p idx %lu\n", page, page->index); + doutc(cl, "? %p idx %lu\n", page, page->index); if (locked_pages == 0) lock_page(page); /* first page */ else if (!trylock_page(page)) @@ -1042,15 +1058,15 @@ get_more_pages: /* only dirty pages, or our accounting breaks */ if (unlikely(!PageDirty(page)) || unlikely(page->mapping != mapping)) { - dout("!dirty or !mapping %p\n", page); + doutc(cl, "!dirty or !mapping %p\n", page); unlock_page(page); continue; } /* only if matching snap context */ pgsnapc = page_snap_context(page); if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); + doutc(cl, "page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); if (!should_loop && !ceph_wbc.head_snapc && wbc->sync_mode != WB_SYNC_NONE) @@ -1061,8 +1077,8 @@ get_more_pages: if (page_offset(page) >= ceph_wbc.i_size) { struct folio *folio = page_folio(page); - dout("folio at %lu beyond eof %llu\n", - folio->index, ceph_wbc.i_size); + doutc(cl, "folio at %lu beyond eof %llu\n", + folio->index, ceph_wbc.i_size); if ((ceph_wbc.size_stable || folio_pos(folio) >= i_size_read(inode)) && folio_clear_dirty_for_io(folio)) @@ -1072,23 +1088,23 @@ get_more_pages: continue; } if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + doutc(cl, "end of strip unit %p\n", page); unlock_page(page); break; } if (PageWriteback(page) || PageFsCache(page)) { if (wbc->sync_mode == WB_SYNC_NONE) { - dout("%p under writeback\n", page); + doutc(cl, "%p under writeback\n", page); unlock_page(page); continue; } - dout("waiting on writeback %p\n", page); + doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); wait_on_page_fscache(page); } if (!clear_page_dirty_for_io(page)) { - dout("%p !clear_page_dirty_for_io\n", page); + doutc(cl, "%p !clear_page_dirty_for_io\n", page); unlock_page(page); continue; } @@ -1143,8 +1159,8 @@ get_more_pages: } /* note position of first page in fbatch */ - dout("%p will write page %p idx %lu\n", - inode, page, page->index); + doutc(cl, "%llx.%llx will write page %p idx %lu\n", + ceph_vinop(inode), page, page->index); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( @@ -1158,8 +1174,9 @@ get_more_pages: locked_pages ? GFP_NOWAIT : GFP_NOFS); if (IS_ERR(pages[locked_pages])) { if (PTR_ERR(pages[locked_pages]) == -EINVAL) - pr_err("%s: inode->i_blkbits=%hhu\n", - __func__, inode->i_blkbits); + pr_err_client(cl, + "inode->i_blkbits=%hhu\n", + inode->i_blkbits); /* better not fail on first page! */ BUG_ON(locked_pages == 0); pages[locked_pages] = NULL; @@ -1193,7 +1210,7 @@ get_more_pages: if (nr_folios && i == nr_folios && locked_pages < max_pages) { - dout("reached end fbatch, trying for more\n"); + doutc(cl, "reached end fbatch, trying for more\n"); folio_batch_release(&fbatch); goto get_more_pages; } @@ -1254,8 +1271,8 @@ new_request: /* Start a new extent */ osd_req_op_extent_dup_last(req, op_idx, cur_offset - offset); - dout("writepages got pages at %llu~%llu\n", - offset, len); + doutc(cl, "got pages at %llu~%llu\n", offset, + len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); @@ -1288,12 +1305,13 @@ new_request: if (IS_ENCRYPTED(inode)) len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE); - dout("writepages got pages at %llu~%llu\n", offset, len); + doutc(cl, "got pages at %llu~%llu\n", offset, len); if (IS_ENCRYPTED(inode) && ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK)) - pr_warn("%s: bad encrypted write offset=%lld len=%llu\n", - __func__, offset, len); + pr_warn_client(cl, + "bad encrypted write offset=%lld len=%llu\n", + offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); @@ -1345,14 +1363,14 @@ new_request: done = true; release_folios: - dout("folio_batch release on %d folios (%p)\n", (int)fbatch.nr, - fbatch.nr ? fbatch.folios[0] : NULL); + doutc(cl, "folio_batch release on %d folios (%p)\n", + (int)fbatch.nr, fbatch.nr ? fbatch.folios[0] : NULL); folio_batch_release(&fbatch); } if (should_loop && !done) { /* more to do; loop back to beginning of file */ - dout("writepages looping back to beginning of file\n"); + doutc(cl, "looping back to beginning of file\n"); end = start_index - 1; /* OK even when start_index == 0 */ /* to write dirty pages associated with next snapc, @@ -1390,7 +1408,8 @@ release_folios: out: ceph_osdc_put_request(req); ceph_put_snap_context(last_snapc); - dout("writepages dend - startone, rc = %d\n", rc); + doutc(cl, "%llx.%llx dend - startone, rc = %d\n", ceph_vinop(inode), + rc); return rc; } @@ -1424,11 +1443,12 @@ static struct ceph_snap_context * ceph_find_incompatible(struct page *page) { struct inode *inode = page->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (ceph_inode_is_shutdown(inode)) { - dout(" page %p %llx:%llx is shutdown\n", page, - ceph_vinop(inode)); + doutc(cl, " %llx.%llx page %p is shutdown\n", + ceph_vinop(inode), page); return ERR_PTR(-ESTALE); } @@ -1449,13 +1469,15 @@ ceph_find_incompatible(struct page *page) if (snapc->seq > oldest->seq) { /* not writeable -- return it for the caller to deal with */ ceph_put_snap_context(oldest); - dout(" page %p snapc %p not current or oldest\n", page, snapc); + doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n", + ceph_vinop(inode), page, snapc); return ceph_get_snap_context(snapc); } ceph_put_snap_context(oldest); /* yay, writeable, do it now (without dropping page lock) */ - dout(" page %p snapc %p not current, but oldest\n", page, snapc); + doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n", + ceph_vinop(inode), page, snapc); if (clear_page_dirty_for_io(page)) { int r = writepage_nounlock(page, NULL); if (r < 0) @@ -1524,10 +1546,11 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, { struct folio *folio = page_folio(subpage); struct inode *inode = file_inode(file); + struct ceph_client *cl = ceph_inode_to_client(inode); bool check_cap = false; - dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file, - inode, folio, (int)pos, (int)copied, (int)len); + doutc(cl, "%llx.%llx file %p folio %p %d~%d (%d)\n", ceph_vinop(inode), + file, folio, (int)pos, (int)copied, (int)len); if (!folio_test_uptodate(folio)) { /* just return that nothing was copied on a short copy */ @@ -1587,6 +1610,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi = vma->vm_file->private_data; loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; @@ -1598,8 +1622,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ceph_block_sigs(&oldset); - dout("filemap_fault %p %llx.%llx %llu trying to get caps\n", - inode, ceph_vinop(inode), off); + doutc(cl, "%llx.%llx %llu trying to get caps\n", + ceph_vinop(inode), off); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else @@ -1610,8 +1634,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) if (err < 0) goto out_restore; - dout("filemap_fault %p %llu got cap refs on %s\n", - inode, off, ceph_cap_string(got)); + doutc(cl, "%llx.%llx %llu got cap refs on %s\n", ceph_vinop(inode), + off, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || !ceph_has_inline_data(ci)) { @@ -1619,8 +1643,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); - dout("filemap_fault %p %llu drop cap refs %s ret %x\n", - inode, off, ceph_cap_string(got), ret); + doutc(cl, "%llx.%llx %llu drop cap refs %s ret %x\n", + ceph_vinop(inode), off, ceph_cap_string(got), ret); } else err = -EAGAIN; @@ -1661,8 +1685,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: filemap_invalidate_unlock_shared(mapping); - dout("filemap_fault %p %llu read inline data ret %x\n", - inode, off, ret); + doutc(cl, "%llx.%llx %llu read inline data ret %x\n", + ceph_vinop(inode), off, ret); } out_restore: ceph_restore_sigs(&oldset); @@ -1676,6 +1700,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct ceph_cap_flush *prealloc_cf; @@ -1702,8 +1727,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) else len = offset_in_thp(page, size); - dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n", - inode, ceph_vinop(inode), off, len, size); + doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n", + ceph_vinop(inode), off, len, size); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; else @@ -1714,8 +1739,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) if (err < 0) goto out_free; - dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", - inode, off, len, ceph_cap_string(got)); + doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode), + off, len, ceph_cap_string(got)); /* Update time before taking page lock */ file_update_time(vma->vm_file); @@ -1763,8 +1788,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) __mark_inode_dirty(inode, dirty); } - dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", - inode, off, len, ceph_cap_string(got), ret); + doutc(cl, "%llx.%llx %llu~%zd dropping cap refs on %s ret %x\n", + ceph_vinop(inode), off, len, ceph_cap_string(got), ret); ceph_put_cap_refs_async(ci, got); out_free: ceph_restore_sigs(&oldset); @@ -1778,6 +1803,7 @@ out_free: void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct address_space *mapping = inode->i_mapping; struct page *page; @@ -1798,8 +1824,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } - dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n", - inode, ceph_vinop(inode), len, locked_page); + doutc(cl, "%p %llx.%llx len %zu locked_page %p\n", inode, + ceph_vinop(inode), len, locked_page); if (len > 0) { void *kaddr = kmap_atomic(page); @@ -1823,7 +1849,8 @@ int ceph_uninline_data(struct file *file) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; @@ -1836,8 +1863,8 @@ int ceph_uninline_data(struct file *file) inline_version = ci->i_inline_version; spin_unlock(&ci->i_ceph_lock); - dout("uninline_data %p %llx.%llx inline_version %llu\n", - inode, ceph_vinop(inode), inline_version); + doutc(cl, "%llx.%llx inline_version %llu\n", ceph_vinop(inode), + inline_version); if (ceph_inode_is_shutdown(inode)) { err = -EIO; @@ -1949,8 +1976,8 @@ out_unlock: } out: ceph_free_cap_flush(prealloc_cf); - dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", - inode, ceph_vinop(inode), inline_version, err); + doutc(cl, "%llx.%llx inline_version %llu = %d\n", + ceph_vinop(inode), inline_version, err); return err; } @@ -1977,8 +2004,9 @@ enum { static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool, struct ceph_string *pool_ns) { - struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->netfs.inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(&ci->netfs.inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; struct rb_node **p, *parent; struct ceph_pool_perm *perm; @@ -2013,10 +2041,10 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, goto out; if (pool_ns) - dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n", - pool, (int)pool_ns->len, pool_ns->str); + doutc(cl, "pool %lld ns %.*s no perm cached\n", pool, + (int)pool_ns->len, pool_ns->str); else - dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool); + doutc(cl, "pool %lld no perm cached\n", pool); down_write(&mdsc->pool_perm_rwsem); p = &mdsc->pool_perm_tree.rb_node; @@ -2141,15 +2169,16 @@ out: if (!err) err = have; if (pool_ns) - dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n", - pool, (int)pool_ns->len, pool_ns->str, err); + doutc(cl, "pool %lld ns %.*s result = %d\n", pool, + (int)pool_ns->len, pool_ns->str, err); else - dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err); + doutc(cl, "pool %lld result = %d\n", pool, err); return err; } int ceph_pool_perm_check(struct inode *inode, int need) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_string *pool_ns; s64 pool; @@ -2168,7 +2197,7 @@ int ceph_pool_perm_check(struct inode *inode, int need) return 0; } - if (ceph_test_mount_opt(ceph_inode_to_client(inode), + if (ceph_test_mount_opt(ceph_inode_to_fs_client(inode), NOPOOLPERM)) return 0; @@ -2179,13 +2208,11 @@ int ceph_pool_perm_check(struct inode *inode, int need) check: if (flags & CEPH_I_POOL_PERM) { if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { - dout("ceph_pool_perm_check pool %lld no read perm\n", - pool); + doutc(cl, "pool %lld no read perm\n", pool); return -EPERM; } if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { - dout("ceph_pool_perm_check pool %lld no write perm\n", - pool); + doutc(cl, "pool %lld no write perm\n", pool); return -EPERM; } return 0; diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index de1dee46d3df..930fbd54d2c8 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -15,7 +15,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); /* No caching for filesystem? */ if (!fsc->fscache) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a104669fcf4c..2c0b8dc3dd0d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -186,10 +186,10 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) mdsc->caps_avail_count += nr_caps; } - dout("%s: caps %d = %d used + %d resv + %d avail\n", - __func__, - mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(mdsc->fsc->client, + "caps %d = %d used + %d resv + %d avail\n", + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); @@ -202,6 +202,7 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need) { + struct ceph_client *cl = mdsc->fsc->client; int i, j; struct ceph_cap *cap; int have; @@ -212,7 +213,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *s; LIST_HEAD(newcaps); - dout("reserve caps ctx=%p need=%d\n", ctx, need); + doutc(cl, "ctx=%p need=%d\n", ctx, need); /* first reserve any caps that are already allocated */ spin_lock(&mdsc->caps_list_lock); @@ -272,8 +273,8 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, continue; } - pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", - ctx, need, have + alloc); + pr_warn_client(cl, "ctx=%p ENOMEM need=%d got=%d\n", ctx, need, + have + alloc); err = -ENOMEM; break; } @@ -298,20 +299,21 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, spin_unlock(&mdsc->caps_list_lock); - dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", - ctx, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "ctx=%p %d = %d used + %d resv + %d avail\n", ctx, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); return err; } void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + struct ceph_client *cl = mdsc->fsc->client; bool reclaim = false; if (!ctx->count) return; - dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); + doutc(cl, "ctx=%p count=%d\n", ctx, ctx->count); spin_lock(&mdsc->caps_list_lock); __ceph_unreserve_caps(mdsc, ctx->count); ctx->count = 0; @@ -328,6 +330,7 @@ void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ @@ -359,9 +362,9 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, } spin_lock(&mdsc->caps_list_lock); - dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", - ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "ctx=%p (%d) %d = %d used + %d resv + %d avail\n", ctx, + ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(!ctx->count); BUG_ON(ctx->count > mdsc->caps_reserve_count); BUG_ON(list_empty(&mdsc->caps_list)); @@ -382,10 +385,12 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) { + struct ceph_client *cl = mdsc->fsc->client; + spin_lock(&mdsc->caps_list_lock); - dout("put_cap %p %d = %d used + %d resv + %d avail\n", - cap, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "%p %d = %d used + %d resv + %d avail\n", cap, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); mdsc->caps_use_count--; /* * Keep some preallocated caps around (ceph_min_count), to @@ -491,11 +496,13 @@ static void __insert_cap_node(struct ceph_inode_info *ci, static void __cap_set_timeouts(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; struct ceph_mount_options *opt = mdsc->fsc->mount_options; + ci->i_hold_caps_max = round_jiffies(jiffies + opt->caps_wanted_delay_max * HZ); - dout("__cap_set_timeouts %p %lu\n", &ci->netfs.inode, - ci->i_hold_caps_max - jiffies); + doutc(mdsc->fsc->client, "%p %llx.%llx %lu\n", inode, + ceph_vinop(inode), ci->i_hold_caps_max - jiffies); } /* @@ -509,8 +516,11 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, static void __cap_delay_requeue(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->netfs.inode, - ci->i_ceph_flags, ci->i_hold_caps_max); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx flags 0x%lx at %lu\n", + inode, ceph_vinop(inode), ci->i_ceph_flags, + ci->i_hold_caps_max); if (!mdsc->stopping) { spin_lock(&mdsc->cap_delay_lock); if (!list_empty(&ci->i_cap_delay_list)) { @@ -533,7 +543,9 @@ no_change: static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_requeue_front %p\n", &ci->netfs.inode); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) @@ -550,7 +562,9 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, static void __cap_delay_cancel(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_cancel %p\n", &ci->netfs.inode); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (list_empty(&ci->i_cap_delay_list)) return; spin_lock(&mdsc->cap_delay_lock); @@ -562,6 +576,9 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc, static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, unsigned issued) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + unsigned had = __ceph_caps_issued(ci, NULL); lockdep_assert_held(&ci->i_ceph_lock); @@ -586,7 +603,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, if (issued & CEPH_CAP_FILE_SHARED) atomic_inc(&ci->i_shared_gen); if (S_ISDIR(ci->netfs.inode.i_mode)) { - dout(" marking %p NOT complete\n", &ci->netfs.inode); + doutc(cl, " marking %p NOT complete\n", inode); __ceph_dir_clear_complete(ci); } } @@ -635,7 +652,8 @@ void ceph_add_cap(struct inode *inode, unsigned seq, unsigned mseq, u64 realmino, int flags, struct ceph_cap **new_cap) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; int mds = session->s_mds; @@ -644,8 +662,9 @@ void ceph_add_cap(struct inode *inode, lockdep_assert_held(&ci->i_ceph_lock); - dout("add_cap %p mds%d cap %llx %s seq %d\n", inode, - session->s_mds, cap_id, ceph_cap_string(issued), seq); + doutc(cl, "%p %llx.%llx mds%d cap %llx %s seq %d\n", inode, + ceph_vinop(inode), session->s_mds, cap_id, + ceph_cap_string(issued), seq); gen = atomic_read(&session->s_cap_gen); @@ -723,9 +742,9 @@ void ceph_add_cap(struct inode *inode, actual_wanted = __ceph_caps_wanted(ci); if ((wanted & ~actual_wanted) || (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) { - dout(" issued %s, mds wanted %s, actual %s, queueing\n", - ceph_cap_string(issued), ceph_cap_string(wanted), - ceph_cap_string(actual_wanted)); + doutc(cl, "issued %s, mds wanted %s, actual %s, queueing\n", + ceph_cap_string(issued), ceph_cap_string(wanted), + ceph_cap_string(actual_wanted)); __cap_delay_requeue(mdsc, ci); } @@ -742,9 +761,9 @@ void ceph_add_cap(struct inode *inode, WARN_ON(ci->i_auth_cap == cap); } - dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", - inode, ceph_vinop(inode), cap, ceph_cap_string(issued), - ceph_cap_string(issued|cap->issued), seq, mds); + doutc(cl, "inode %p %llx.%llx cap %p %s now %s seq %d mds%d\n", + inode, ceph_vinop(inode), cap, ceph_cap_string(issued), + ceph_cap_string(issued|cap->issued), seq, mds); cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; @@ -766,6 +785,8 @@ void ceph_add_cap(struct inode *inode, */ static int __cap_is_valid(struct ceph_cap *cap) { + struct inode *inode = &cap->ci->netfs.inode; + struct ceph_client *cl = cap->session->s_mdsc->fsc->client; unsigned long ttl; u32 gen; @@ -773,9 +794,9 @@ static int __cap_is_valid(struct ceph_cap *cap) ttl = cap->session->s_cap_ttl; if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { - dout("__cap_is_valid %p cap %p issued %s " - "but STALE (gen %u vs %u)\n", &cap->ci->netfs.inode, - cap, ceph_cap_string(cap->issued), cap->cap_gen, gen); + doutc(cl, "%p %llx.%llx cap %p issued %s but STALE (gen %u vs %u)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), cap->cap_gen, gen); return 0; } @@ -789,6 +810,8 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -799,8 +822,8 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) cap = rb_entry(p, struct ceph_cap, ci_node); if (!__cap_is_valid(cap)) continue; - dout("__ceph_caps_issued %p cap %p issued %s\n", - &ci->netfs.inode, cap, ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode, + ceph_vinop(inode), cap, ceph_cap_string(cap->issued)); have |= cap->issued; if (implemented) *implemented |= cap->implemented; @@ -843,16 +866,18 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap) */ static void __touch_cap(struct ceph_cap *cap) { + struct inode *inode = &cap->ci->netfs.inode; struct ceph_mds_session *s = cap->session; + struct ceph_client *cl = s->s_mdsc->fsc->client; spin_lock(&s->s_cap_lock); if (!s->s_cap_iterator) { - dout("__touch_cap %p cap %p mds%d\n", &cap->ci->netfs.inode, cap, - s->s_mds); + doutc(cl, "%p %llx.%llx cap %p mds%d\n", inode, + ceph_vinop(inode), cap, s->s_mds); list_move_tail(&cap->session_caps, &s->s_caps); } else { - dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n", - &cap->ci->netfs.inode, cap, s->s_mds); + doutc(cl, "%p %llx.%llx cap %p mds%d NOP, iterating over caps\n", + inode, ceph_vinop(inode), cap, s->s_mds); } spin_unlock(&s->s_cap_lock); } @@ -864,15 +889,16 @@ static void __touch_cap(struct ceph_cap *cap) */ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct rb_node *p; int have = ci->i_snap_caps; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), - ceph_cap_string(have), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx snap issued %s (mask %s)\n", + inode, ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(mask)); return 1; } @@ -881,10 +907,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) if (!__cap_is_valid(cap)) continue; if ((cap->issued & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), cap, - ceph_cap_string(cap->issued), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), + ceph_cap_string(mask)); if (touch) __touch_cap(cap); return 1; @@ -893,10 +919,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) /* does a combination of caps satisfy mask? */ have |= cap->issued; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), - ceph_cap_string(cap->issued), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx combo issued %s (mask %s)\n", + inode, ceph_vinop(inode), + ceph_cap_string(cap->issued), + ceph_cap_string(mask)); if (touch) { struct rb_node *q; @@ -922,7 +948,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask, int touch) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); int r; r = __ceph_caps_issued_mask(ci, mask, touch); @@ -954,13 +980,14 @@ int __ceph_caps_revoking_other(struct ceph_inode_info *ci, int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int ret; spin_lock(&ci->i_ceph_lock); ret = __ceph_caps_revoking_other(ci, NULL, mask); spin_unlock(&ci->i_ceph_lock); - dout("ceph_caps_revoking %p %s = %d\n", inode, - ceph_cap_string(mask), ret); + doutc(cl, "%p %llx.%llx %s = %d\n", inode, ceph_vinop(inode), + ceph_cap_string(mask), ret); return ret; } @@ -996,7 +1023,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR); const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY); struct ceph_mount_options *opt = - ceph_inode_to_client(&ci->netfs.inode)->mount_options; + ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ; unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ; @@ -1107,21 +1134,23 @@ int ceph_is_any_caps(struct inode *inode) void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_inode_info *ci = cap->ci; + struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc; int removed = 0; /* 'ci' being NULL means the remove have already occurred */ if (!ci) { - dout("%s: cap inode is NULL\n", __func__); + doutc(cl, "inode is NULL\n"); return; } lockdep_assert_held(&ci->i_ceph_lock); - dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode); + doutc(cl, "%p from %p %llx.%llx\n", cap, inode, ceph_vinop(inode)); - mdsc = ceph_inode_to_client(&ci->netfs.inode)->mdsc; + mdsc = ceph_inode_to_fs_client(&ci->netfs.inode)->mdsc; /* remove from inode's cap rbtree, and clear auth cap */ rb_erase(&cap->ci_node, &ci->i_caps); @@ -1132,8 +1161,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ - dout("__ceph_remove_cap delaying %p removal from session %p\n", - cap, cap->session); + doutc(cl, "delaying %p removal from session %p\n", cap, + cap->session); } else { list_del_init(&cap->session_caps); session->s_nr_caps--; @@ -1178,20 +1207,21 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } } -void ceph_remove_cap(struct ceph_cap *cap, bool queue_release) +void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release) { struct ceph_inode_info *ci = cap->ci; struct ceph_fs_client *fsc; /* 'ci' being NULL means the remove have already occurred */ if (!ci) { - dout("%s: cap inode is NULL\n", __func__); + doutc(mdsc->fsc->client, "inode is NULL\n"); return; } lockdep_assert_held(&ci->i_ceph_lock); - fsc = ceph_inode_to_client(&ci->netfs.inode); + fsc = ceph_inode_to_fs_client(&ci->netfs.inode); WARN_ON_ONCE(ci->i_auth_cap == cap && !list_empty(&ci->i_dirty_item) && !fsc->blocklisted && @@ -1227,15 +1257,19 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { struct ceph_mds_caps *fc; void *p; - struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; - - dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n", - __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino, - ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), - ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, - arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, - arg->size, arg->max_size, arg->xattr_version, - arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); + struct ceph_mds_client *mdsc = arg->session->s_mdsc; + struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; + + doutc(mdsc->fsc->client, + "%s %llx %llx caps %s wanted %s dirty %s seq %u/%u" + " tid %llu/%llu mseq %u follows %lld size %llu/%llu" + " xattr_ver %llu xattr_len %d\n", + ceph_cap_op_name(arg->op), arg->cid, arg->ino, + ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), + ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, + arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, + arg->size, arg->max_size, arg->xattr_version, + arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); msg->hdr.version = cpu_to_le16(12); msg->hdr.tid = cpu_to_le64(arg->flush_tid); @@ -1342,6 +1376,8 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) */ void __ceph_remove_caps(struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct rb_node *p; /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) @@ -1351,7 +1387,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); } spin_unlock(&ci->i_ceph_lock); } @@ -1370,6 +1406,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int held, revoking; lockdep_assert_held(&ci->i_ceph_lock); @@ -1378,10 +1415,10 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, revoking = cap->implemented & ~cap->issued; retain &= ~revoking; - dout("%s %p cap %p session %p %s -> %s (revoking %s)\n", - __func__, inode, cap, cap->session, - ceph_cap_string(held), ceph_cap_string(held & retain), - ceph_cap_string(revoking)); + doutc(cl, "%p %llx.%llx cap %p session %p %s -> %s (revoking %s)\n", + inode, ceph_vinop(inode), cap, cap->session, + ceph_cap_string(held), ceph_cap_string(held & retain), + ceph_cap_string(revoking)); BUG_ON((retain & CEPH_CAP_PIN) == 0); ci->i_ceph_flags &= ~CEPH_I_FLUSH; @@ -1497,13 +1534,16 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { struct ceph_msg *msg; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS, false); if (!msg) { - pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n", - ceph_vinop(inode), ceph_cap_string(arg->dirty), - arg->flush_tid); + pr_err_client(cl, + "error allocating cap msg: ino (%llx.%llx)" + " flushing %s tid %llu, requeuing cap.\n", + ceph_vinop(inode), ceph_cap_string(arg->dirty), + arg->flush_tid); spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); @@ -1592,11 +1632,13 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; u64 oldest_flush_tid = 0; u64 first_tid = 1, last_tid = 0; - dout("__flush_snaps %p session %p\n", inode, session); + doutc(cl, "%p %llx.%llx session %p\n", inode, ceph_vinop(inode), + session); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { /* @@ -1611,7 +1653,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, /* only flush each capsnap once */ if (capsnap->cap_flush.tid > 0) { - dout(" already flushed %p, skipping\n", capsnap); + doutc(cl, "already flushed %p, skipping\n", capsnap); continue; } @@ -1643,8 +1685,8 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, int ret; if (!(cap && cap->session == session)) { - dout("__flush_snaps %p auth cap %p not mds%d, " - "stop\n", inode, cap, session->s_mds); + doutc(cl, "%p %llx.%llx auth cap %p not mds%d, stop\n", + inode, ceph_vinop(inode), cap, session->s_mds); break; } @@ -1665,15 +1707,17 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); - dout("__flush_snaps %p capsnap %p tid %llu %s\n", - inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); + doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", inode, + ceph_vinop(inode), capsnap, cf->tid, + ceph_cap_string(capsnap->dirty)); ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { - pr_err("__flush_snaps: error sending cap flushsnap, " - "ino (%llx.%llx) tid %llu follows %llu\n", - ceph_vinop(inode), cf->tid, capsnap->follows); + pr_err_client(cl, "error sending cap flushsnap, " + "ino (%llx.%llx) tid %llu follows %llu\n", + ceph_vinop(inode), cf->tid, + capsnap->follows); } ceph_put_cap_snap(capsnap); @@ -1685,28 +1729,29 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession) { struct inode *inode = &ci->netfs.inode; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_mds_session *session = NULL; bool need_put = false; int mds; - dout("ceph_flush_snaps %p\n", inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (psession) session = *psession; retry: spin_lock(&ci->i_ceph_lock); if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { - dout(" no capsnap needs flush, doing nothing\n"); + doutc(cl, " no capsnap needs flush, doing nothing\n"); goto out; } if (!ci->i_auth_cap) { - dout(" no auth cap (migrating?), doing nothing\n"); + doutc(cl, " no auth cap (migrating?), doing nothing\n"); goto out; } mds = ci->i_auth_cap->session->s_mds; if (session && session->s_mds != mds) { - dout(" oops, wrong session %p mutex\n", session); + doutc(cl, " oops, wrong session %p mutex\n", session); ceph_put_mds_session(session); session = NULL; } @@ -1750,23 +1795,25 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->netfs.inode.i_sb)->mdsc; + ceph_sb_to_fs_client(ci->netfs.inode.i_sb)->mdsc; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int was = ci->i_dirty_caps; int dirty = 0; lockdep_assert_held(&ci->i_ceph_lock); if (!ci->i_auth_cap) { - pr_warn("__mark_dirty_caps %p %llx mask %s, " - "but no auth cap (session was closed?)\n", - inode, ceph_ino(inode), ceph_cap_string(mask)); + pr_warn_client(cl, "%p %llx.%llx mask %s, " + "but no auth cap (session was closed?)\n", + inode, ceph_vinop(inode), + ceph_cap_string(mask)); return 0; } - dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->netfs.inode, - ceph_cap_string(mask), ceph_cap_string(was), - ceph_cap_string(was | mask)); + doutc(cl, "%p %llx.%llx %s dirty %s -> %s\n", inode, + ceph_vinop(inode), ceph_cap_string(mask), + ceph_cap_string(was), ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { struct ceph_mds_session *session = ci->i_auth_cap->session; @@ -1779,8 +1826,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); } - dout(" inode %p now dirty snapc %p auth cap %p\n", - &ci->netfs.inode, ci->i_head_snapc, ci->i_auth_cap); + doutc(cl, "%p %llx.%llx now dirty snapc %p auth cap %p\n", + inode, ceph_vinop(inode), ci->i_head_snapc, + ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &session->s_cap_dirty); @@ -1873,7 +1921,8 @@ static u64 __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session, bool wake, u64 *oldest_flush_tid) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *cf = NULL; int flushing; @@ -1884,13 +1933,13 @@ static u64 __mark_caps_flushing(struct inode *inode, BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; - dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", - ceph_cap_string(flushing), - ceph_cap_string(ci->i_flushing_caps), - ceph_cap_string(ci->i_flushing_caps | flushing)); + doutc(cl, "flushing %s, flushing_caps %s -> %s\n", + ceph_cap_string(flushing), + ceph_cap_string(ci->i_flushing_caps), + ceph_cap_string(ci->i_flushing_caps | flushing)); ci->i_flushing_caps |= flushing; ci->i_dirty_caps = 0; - dout(" inode %p now !dirty\n", inode); + doutc(cl, "%p %llx.%llx now !dirty\n", inode, ceph_vinop(inode)); swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; @@ -1921,6 +1970,7 @@ static int try_nonblocking_invalidate(struct inode *inode) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; @@ -1932,12 +1982,13 @@ static int try_nonblocking_invalidate(struct inode *inode) if (inode->i_data.nrpages == 0 && invalidating_gen == ci->i_rdcache_gen) { /* success. */ - dout("try_nonblocking_invalidate %p success\n", inode); + doutc(cl, "%p %llx.%llx success\n", inode, + ceph_vinop(inode)); /* save any racing async invalidate some trouble */ ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; return 0; } - dout("try_nonblocking_invalidate %p failed\n", inode); + doutc(cl, "%p %llx.%llx failed\n", inode, ceph_vinop(inode)); return -1; } @@ -1969,6 +2020,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; @@ -2043,9 +2095,9 @@ retry: } } - dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s" - " issued %s revoking %s retain %s %s%s%s\n", ceph_vinop(inode), - ceph_cap_string(file_wanted), + doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " + "flushing %s issued %s revoking %s retain %s %s%s%s\n", + inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(issued), ceph_cap_string(revoking), @@ -2066,10 +2118,10 @@ retry: (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { - dout("check_caps trying to invalidate on %llx.%llx\n", - ceph_vinop(inode)); + doutc(cl, "trying to invalidate on %p %llx.%llx\n", + inode, ceph_vinop(inode)); if (try_nonblocking_invalidate(inode) < 0) { - dout("check_caps queuing invalidate\n"); + doutc(cl, "queuing invalidate\n"); queue_invalidate = true; ci->i_rdcache_revoking = ci->i_rdcache_gen; } @@ -2097,35 +2149,35 @@ retry: cap_used &= ~ci->i_auth_cap->issued; revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", - cap->mds, cap, ceph_cap_string(cap_used), - ceph_cap_string(cap->issued), - ceph_cap_string(cap->implemented), - ceph_cap_string(revoking)); + doutc(cl, " mds%d cap %p used %s issued %s implemented %s revoking %s\n", + cap->mds, cap, ceph_cap_string(cap_used), + ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented), + ceph_cap_string(revoking)); if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ if (ci->i_wanted_max_size > ci->i_max_size && ci->i_wanted_max_size > ci->i_requested_max_size) { - dout("requesting new max_size\n"); + doutc(cl, "requesting new max_size\n"); goto ack; } /* approaching file_max? */ if (__ceph_should_report_size(ci)) { - dout("i_size approaching max_size\n"); + doutc(cl, "i_size approaching max_size\n"); goto ack; } } /* flush anything dirty? */ if (cap == ci->i_auth_cap) { if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) { - dout("flushing dirty caps\n"); + doutc(cl, "flushing dirty caps\n"); goto ack; } if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { - dout("flushing snap caps\n"); + doutc(cl, "flushing snap caps\n"); goto ack; } } @@ -2133,7 +2185,7 @@ retry: /* completed revocation? going down and there are no caps? */ if (revoking) { if ((revoking & cap_used) == 0) { - dout("completed revocation of %s\n", + doutc(cl, "completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; } @@ -2232,7 +2284,7 @@ ack: */ static int try_flush_caps(struct inode *inode, u64 *ptid) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing = 0; u64 flush_tid = 0, oldest_flush_tid = 0; @@ -2310,7 +2362,8 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) */ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; int ret, err = 0; @@ -2400,8 +2453,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) kfree(sessions); } - dout("%s %p wait on tid %llu %llu\n", __func__, - inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); + doutc(cl, "%p %llx.%llx wait on tid %llu %llu\n", inode, + ceph_vinop(inode), req1 ? req1->r_tid : 0ULL, + req2 ? req2->r_tid : 0ULL); if (req1) { ret = !wait_for_completion_timeout(&req1->r_safe_completion, ceph_timeout_jiffies(req1->r_timeout)); @@ -2427,11 +2481,13 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int ret, err; int dirty; - dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); + doutc(cl, "%p %llx.%llx%s\n", inode, ceph_vinop(inode), + datasync ? " datasync" : ""); ret = file_write_and_wait_range(file, start, end); if (datasync) @@ -2442,7 +2498,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) goto out; dirty = try_flush_caps(inode, &flush_tid); - dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); + doutc(cl, "dirty caps are %s\n", ceph_cap_string(dirty)); err = flush_mdlog_and_wait_inode_unsafe_requests(inode); @@ -2463,7 +2519,8 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (err < 0) ret = err; out: - dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); + doutc(cl, "%p %llx.%llx%s result=%d\n", inode, ceph_vinop(inode), + datasync ? " datasync" : "", ret); return ret; } @@ -2476,12 +2533,13 @@ out: int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int err = 0; int dirty; int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync); - dout("write_inode %p wait=%d\n", inode, wait); + doutc(cl, "%p %llx.%llx wait=%d\n", inode, ceph_vinop(inode), wait); ceph_fscache_unpin_writeback(inode, wbc); if (wait) { err = ceph_wait_on_async_create(inode); @@ -2493,7 +2551,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) caps_are_flushed(inode, flush_tid)); } else { struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + ceph_sb_to_fs_client(inode->i_sb)->mdsc; spin_lock(&ci->i_ceph_lock); if (__ceph_caps_dirty(ci)) @@ -2511,6 +2569,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, __acquires(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap; struct ceph_cap_flush *cf; int ret; @@ -2536,8 +2595,8 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - inode, cap, session->s_mds); + pr_err_client(cl, "%p auth cap %p not mds%d ???\n", + inode, cap, session->s_mds); break; } @@ -2546,8 +2605,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, if (!cf->is_capsnap) { struct cap_msg_args arg; - dout("kick_flushing_caps %p cap %p tid %llu %s\n", - inode, cap, cf->tid, ceph_cap_string(cf->caps)); + doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n", + inode, ceph_vinop(inode), cap, cf->tid, + ceph_cap_string(cf->caps)); __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, (cf->tid < last_snap_flush ? CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0), @@ -2561,9 +2621,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_cap_snap *capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); - dout("kick_flushing_caps %p capsnap %p tid %llu %s\n", - inode, capsnap, cf->tid, - ceph_cap_string(capsnap->dirty)); + doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", + inode, ceph_vinop(inode), capsnap, cf->tid, + ceph_cap_string(capsnap->dirty)); refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); @@ -2571,11 +2631,10 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { - pr_err("kick_flushing_caps: error sending " - "cap flushsnap, ino (%llx.%llx) " - "tid %llu follows %llu\n", - ceph_vinop(inode), cf->tid, - capsnap->follows); + pr_err_client(cl, "error sending cap flushsnap," + " %p %llx.%llx tid %llu follows %llu\n", + inode, ceph_vinop(inode), cf->tid, + capsnap->follows); } ceph_put_cap_snap(capsnap); @@ -2588,22 +2647,26 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; - dout("early_kick_flushing_caps mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + struct inode *inode = &ci->netfs.inode; + spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - &ci->netfs.inode, cap, session->s_mds); + pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", + inode, ceph_vinop(inode), cap, + session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } @@ -2636,24 +2699,28 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; lockdep_assert_held(&session->s_mutex); - dout("kick_flushing_caps mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + struct inode *inode = &ci->netfs.inode; + spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - &ci->netfs.inode, cap, session->s_mds); + pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", + inode, ceph_vinop(inode), cap, + session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } @@ -2670,11 +2737,13 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, { struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_cap *cap = ci->i_auth_cap; + struct inode *inode = &ci->netfs.inode; lockdep_assert_held(&ci->i_ceph_lock); - dout("%s %p flushing %s\n", __func__, &ci->netfs.inode, - ceph_cap_string(ci->i_flushing_caps)); + doutc(mdsc->fsc->client, "%p %llx.%llx flushing %s\n", + inode, ceph_vinop(inode), + ceph_cap_string(ci->i_flushing_caps)); if (!list_empty(&ci->i_cap_flush_list)) { u64 oldest_flush_tid; @@ -2696,6 +2765,9 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, void ceph_take_cap_refs(struct ceph_inode_info *ci, int got, bool snap_rwsem_locked) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + lockdep_assert_held(&ci->i_ceph_lock); if (got & CEPH_CAP_PIN) @@ -2716,10 +2788,10 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got, } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) - ihold(&ci->netfs.inode); + ihold(inode); ci->i_wb_ref++; - dout("%s %p wb %d -> %d (?)\n", __func__, - &ci->netfs.inode, ci->i_wb_ref-1, ci->i_wb_ref); + doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, + ceph_vinop(inode), ci->i_wb_ref-1, ci->i_wb_ref); } } @@ -2746,20 +2818,23 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, loff_t endoff, int flags, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; int have, implemented; bool snap_rwsem_locked = false; - dout("get_cap_refs %p need %s want %s\n", inode, - ceph_cap_string(need), ceph_cap_string(want)); + doutc(cl, "%p %llx.%llx need %s want %s\n", inode, + ceph_vinop(inode), ceph_cap_string(need), + ceph_cap_string(want)); again: spin_lock(&ci->i_ceph_lock); if ((flags & CHECK_FILELOCK) && (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { - dout("try_get_cap_refs %p error filelock\n", inode); + doutc(cl, "%p %llx.%llx error filelock\n", inode, + ceph_vinop(inode)); ret = -EIO; goto out_unlock; } @@ -2779,8 +2854,8 @@ again: if (have & need & CEPH_CAP_FILE_WR) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { - dout("get_cap_refs %p endoff %llu > maxsize %llu\n", - inode, endoff, ci->i_max_size); + doutc(cl, "%p %llx.%llx endoff %llu > maxsize %llu\n", + inode, ceph_vinop(inode), endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN; goto out_unlock; @@ -2790,7 +2865,8 @@ again: * can get a final snapshot value for size+mtime. */ if (__ceph_have_pending_cap_snap(ci)) { - dout("get_cap_refs %p cap_snap_pending\n", inode); + doutc(cl, "%p %llx.%llx cap_snap_pending\n", inode, + ceph_vinop(inode)); goto out_unlock; } } @@ -2808,9 +2884,9 @@ again: int not = want & ~(have & need); int revoking = implemented & ~have; int exclude = revoking & not; - dout("get_cap_refs %p have %s but not %s (revoking %s)\n", - inode, ceph_cap_string(have), ceph_cap_string(not), - ceph_cap_string(revoking)); + doutc(cl, "%p %llx.%llx have %s but not %s (revoking %s)\n", + inode, ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(not), ceph_cap_string(revoking)); if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) { if (!snap_rwsem_locked && !ci->i_head_snapc && @@ -2850,28 +2926,31 @@ again: spin_unlock(&s->s_cap_lock); } if (session_readonly) { - dout("get_cap_refs %p need %s but mds%d readonly\n", - inode, ceph_cap_string(need), ci->i_auth_cap->mds); + doutc(cl, "%p %llx.%llx need %s but mds%d readonly\n", + inode, ceph_vinop(inode), ceph_cap_string(need), + ci->i_auth_cap->mds); ret = -EROFS; goto out_unlock; } if (ceph_inode_is_shutdown(inode)) { - dout("get_cap_refs %p inode is shutdown\n", inode); + doutc(cl, "%p %llx.%llx inode is shutdown\n", + inode, ceph_vinop(inode)); ret = -ESTALE; goto out_unlock; } mds_wanted = __ceph_caps_mds_wanted(ci, false); if (need & ~mds_wanted) { - dout("get_cap_refs %p need %s > mds_wanted %s\n", - inode, ceph_cap_string(need), - ceph_cap_string(mds_wanted)); + doutc(cl, "%p %llx.%llx need %s > mds_wanted %s\n", + inode, ceph_vinop(inode), ceph_cap_string(need), + ceph_cap_string(mds_wanted)); ret = -EUCLEAN; goto out_unlock; } - dout("get_cap_refs %p have %s need %s\n", inode, - ceph_cap_string(have), ceph_cap_string(need)); + doutc(cl, "%p %llx.%llx have %s need %s\n", inode, + ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(need)); } out_unlock: @@ -2886,8 +2965,8 @@ out_unlock: else if (ret == 1) ceph_update_cap_hit(&mdsc->metric); - dout("get_cap_refs %p ret %d got %s\n", inode, - ret, ceph_cap_string(*got)); + doutc(cl, "%p %llx.%llx ret %d got %s\n", inode, + ceph_vinop(inode), ret, ceph_cap_string(*got)); return ret; } @@ -2899,13 +2978,14 @@ out_unlock: static void check_max_size(struct inode *inode, loff_t endoff) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int check = 0; /* do we need to explicitly request a larger max_size? */ spin_lock(&ci->i_ceph_lock); if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) { - dout("write %p at large endoff %llu, req max_size\n", - inode, endoff); + doutc(cl, "write %p %llx.%llx at large endoff %llu, req max_size\n", + inode, ceph_vinop(inode), endoff); ci->i_wanted_max_size = endoff; } /* duplicate ceph_check_caps()'s logic */ @@ -2964,7 +3044,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, int want, loff_t endoff, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int ret, _got, flags; ret = ceph_pool_perm_check(inode, need); @@ -3115,10 +3195,12 @@ void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + if (!capsnap->need_flush && !capsnap->writing && !capsnap->dirty_pages) { - dout("dropping cap_snap %p follows %llu\n", - capsnap, capsnap->follows); + doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows); BUG_ON(capsnap->cap_flush.tid > 0); ceph_put_snap_context(capsnap->context); if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) @@ -3150,6 +3232,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, enum put_cap_refs_mode mode) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int last = 0, put = 0, flushsnaps = 0, wake = 0; bool check_flushsnaps = false; @@ -3172,8 +3255,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, put++; check_flushsnaps = true; } - dout("put_cap_refs %p wb %d -> %d (?)\n", - inode, ci->i_wb_ref+1, ci->i_wb_ref); + doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, + ceph_vinop(inode), ci->i_wb_ref+1, ci->i_wb_ref); } if (had & CEPH_CAP_FILE_WR) { if (--ci->i_wr_ref == 0) { @@ -3213,8 +3296,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, } spin_unlock(&ci->i_ceph_lock); - dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), - last ? " last" : "", put ? " put" : ""); + doutc(cl, "%p %llx.%llx had %s%s%s\n", inode, ceph_vinop(inode), + ceph_cap_string(had), last ? " last" : "", put ? " put" : ""); switch (mode) { case PUT_CAP_REFS_SYNC: @@ -3264,6 +3347,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap_snap *capsnap = NULL, *iter; int put = 0; bool last = false; @@ -3287,11 +3371,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } - dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n", - inode, - ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr, - ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, - last ? " LAST" : ""); + doutc(cl, "on %p %llx.%llx head %d/%d -> %d/%d %s\n", + inode, ceph_vinop(inode), ci->i_wrbuffer_ref+nr, + ci->i_wrbuffer_ref_head+nr, ci->i_wrbuffer_ref, + ci->i_wrbuffer_ref_head, last ? " LAST" : ""); } else { list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->context == snapc) { @@ -3321,13 +3404,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, } } } - dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s\n", - inode, capsnap, capsnap->context->seq, - ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, - ci->i_wrbuffer_ref, capsnap->dirty_pages, - last ? " (wrbuffer last)" : "", - complete_capsnap ? " (complete capsnap)" : ""); + doutc(cl, "%p %llx.%llx cap_snap %p snap %lld %d/%d -> %d/%d %s%s\n", + inode, ceph_vinop(inode), capsnap, capsnap->context->seq, + ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, + ci->i_wrbuffer_ref, capsnap->dirty_pages, + last ? " (wrbuffer last)" : "", + complete_capsnap ? " (complete capsnap)" : ""); } unlock: @@ -3350,9 +3432,10 @@ unlock: */ static void invalidate_aliases(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct dentry *dn, *prev = NULL; - dout("invalidate_aliases inode %p\n", inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); d_prune_aliases(inode); /* * For non-directory inode, d_find_alias() only returns @@ -3411,6 +3494,7 @@ static void handle_cap_grant(struct inode *inode, __releases(ci->i_ceph_lock) __releases(session->s_mdsc->snap_rwsem) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); @@ -3434,10 +3518,11 @@ static void handle_cap_grant(struct inode *inode, if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; - dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, session->s_mds, seq, ceph_cap_string(newcaps)); - dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, - i_size_read(inode)); + doutc(cl, "%p %llx.%llx cap %p mds%d seq %d %s\n", inode, + ceph_vinop(inode), cap, session->s_mds, seq, + ceph_cap_string(newcaps)); + doutc(cl, " size %llu max_size %llu, i_size %llu\n", size, + max_size, i_size_read(inode)); /* @@ -3497,15 +3582,17 @@ static void handle_cap_grant(struct inode *inode, inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); ci->i_btime = extra_info->btime; - dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, - from_kuid(&init_user_ns, inode->i_uid), - from_kgid(&init_user_ns, inode->i_gid)); + doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode, + ceph_vinop(inode), inode->i_mode, + from_kuid(&init_user_ns, inode->i_uid), + from_kgid(&init_user_ns, inode->i_gid)); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len || memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth, ci->fscrypt_auth_len)) - pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n", - __func__, ci->fscrypt_auth_len, + pr_warn_ratelimited_client(cl, + "cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n", + ci->fscrypt_auth_len, extra_info->fscrypt_auth_len); #endif } @@ -3523,8 +3610,8 @@ static void handle_cap_grant(struct inode *inode, u64 version = le64_to_cpu(grant->xattr_version); if (version > ci->i_xattrs.version) { - dout(" got new xattrs v%llu on %p len %d\n", - version, inode, len); + doutc(cl, " got new xattrs v%llu on %p %llx.%llx len %d\n", + version, inode, ceph_vinop(inode), len); if (ci->i_xattrs.blob) ceph_buffer_put(ci->i_xattrs.blob); ci->i_xattrs.blob = ceph_buffer_get(xattr_buf); @@ -3575,8 +3662,8 @@ static void handle_cap_grant(struct inode *inode, if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) { if (max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", - ci->i_max_size, max_size); + doutc(cl, "max_size %lld -> %llu\n", ci->i_max_size, + max_size); ci->i_max_size = max_size; if (max_size >= ci->i_wanted_max_size) { ci->i_wanted_max_size = 0; /* reset */ @@ -3590,10 +3677,9 @@ static void handle_cap_grant(struct inode *inode, wanted = __ceph_caps_wanted(ci); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); - dout(" my wanted = %s, used = %s, dirty %s\n", - ceph_cap_string(wanted), - ceph_cap_string(used), - ceph_cap_string(dirty)); + doutc(cl, " my wanted = %s, used = %s, dirty %s\n", + ceph_cap_string(wanted), ceph_cap_string(used), + ceph_cap_string(dirty)); if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) && (wanted & ~(cap->mds_wanted | newcaps))) { @@ -3614,10 +3700,9 @@ static void handle_cap_grant(struct inode *inode, if (cap->issued & ~newcaps) { int revoking = cap->issued & ~newcaps; - dout("revocation: %s -> %s (revoking %s)\n", - ceph_cap_string(cap->issued), - ceph_cap_string(newcaps), - ceph_cap_string(revoking)); + doutc(cl, "revocation: %s -> %s (revoking %s)\n", + ceph_cap_string(cap->issued), ceph_cap_string(newcaps), + ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && (revoking & used & CEPH_CAP_FILE_BUFFER)) writeback = true; /* initiate writeback; will delay ack */ @@ -3635,11 +3720,12 @@ static void handle_cap_grant(struct inode *inode, cap->issued = newcaps; cap->implemented |= newcaps; } else if (cap->issued == newcaps) { - dout("caps unchanged: %s -> %s\n", - ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); + doutc(cl, "caps unchanged: %s -> %s\n", + ceph_cap_string(cap->issued), + ceph_cap_string(newcaps)); } else { - dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), - ceph_cap_string(newcaps)); + doutc(cl, "grant: %s -> %s\n", ceph_cap_string(cap->issued), + ceph_cap_string(newcaps)); /* non-auth MDS is revoking the newly grant caps ? */ if (cap == ci->i_auth_cap && __ceph_caps_revoking_other(ci, cap, newcaps)) @@ -3727,7 +3813,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_flush *cf, *tmp_cf; LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); @@ -3764,11 +3851,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, } } - dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," - " flushing %s -> %s\n", - inode, session->s_mds, seq, ceph_cap_string(dirty), - ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), - ceph_cap_string(ci->i_flushing_caps & ~cleaned)); + doutc(cl, "%p %llx.%llx mds%d seq %d on %s cleaned %s, flushing %s -> %s\n", + inode, ceph_vinop(inode), session->s_mds, seq, + ceph_cap_string(dirty), ceph_cap_string(cleaned), + ceph_cap_string(ci->i_flushing_caps), + ceph_cap_string(ci->i_flushing_caps & ~cleaned)); if (list_empty(&to_remove) && !cleaned) goto out; @@ -3784,18 +3871,21 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, if (list_empty(&ci->i_cap_flush_list)) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) { - dout(" mds%d still flushing cap on %p\n", - session->s_mds, - &list_first_entry(&session->s_cap_flushing, - struct ceph_inode_info, - i_flushing_item)->netfs.inode); + struct inode *inode = + &list_first_entry(&session->s_cap_flushing, + struct ceph_inode_info, + i_flushing_item)->netfs.inode; + doutc(cl, " mds%d still flushing cap on %p %llx.%llx\n", + session->s_mds, inode, ceph_vinop(inode)); } } mdsc->num_cap_flushing--; - dout(" inode %p now !flushing\n", inode); + doutc(cl, " %p %llx.%llx now !flushing\n", inode, + ceph_vinop(inode)); if (ci->i_dirty_caps == 0) { - dout(" inode %p now clean\n", inode); + doutc(cl, " %p %llx.%llx now clean\n", inode, + ceph_vinop(inode)); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = true; if (ci->i_wr_ref == 0 && @@ -3833,12 +3923,14 @@ void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, bool *wake_ci, bool *wake_mdsc) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; bool ret; lockdep_assert_held(&ci->i_ceph_lock); - dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci); + doutc(cl, "removing capsnap %p, %p %llx.%llx ci %p\n", capsnap, + inode, ceph_vinop(inode), ci); list_del_init(&capsnap->ci_item); ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); @@ -3877,29 +3969,31 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap = NULL, *iter; bool wake_ci = false; bool wake_mdsc = false; - dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", - inode, ci, session->s_mds, follows); + doutc(cl, "%p %llx.%llx ci %p mds%d follows %lld\n", inode, + ceph_vinop(inode), ci, session->s_mds, follows); spin_lock(&ci->i_ceph_lock); list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->follows == follows) { if (iter->cap_flush.tid != flush_tid) { - dout(" cap_snap %p follows %lld tid %lld !=" - " %lld\n", iter, follows, - flush_tid, iter->cap_flush.tid); + doutc(cl, " cap_snap %p follows %lld " + "tid %lld != %lld\n", iter, + follows, flush_tid, + iter->cap_flush.tid); break; } capsnap = iter; break; } else { - dout(" skipping cap_snap %p follows %lld\n", - iter, iter->follows); + doutc(cl, " skipping cap_snap %p follows %lld\n", + iter, iter->follows); } } if (capsnap) @@ -3928,6 +4022,7 @@ static bool handle_cap_trunc(struct inode *inode, struct cap_extra_info *extra_info) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int mds = session->s_mds; int seq = le32_to_cpu(trunc->seq); u32 truncate_seq = le32_to_cpu(trunc->truncate_seq); @@ -3950,8 +4045,8 @@ static bool handle_cap_trunc(struct inode *inode, if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; - dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n", - __func__, inode, mds, seq, truncate_size, truncate_seq); + doutc(cl, "%p %llx.%llx mds%d seq %d to %lld truncate seq %d\n", + inode, ceph_vinop(inode), mds, seq, truncate_size, truncate_seq); queue_trunc = ceph_fill_file_size(inode, issued, truncate_seq, truncate_size, size); return queue_trunc; @@ -3969,7 +4064,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *tsession = NULL; struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); @@ -3989,8 +4085,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, target = -1; } - dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n", - inode, ci, mds, mseq, target); + doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d target %d\n", + inode, ceph_vinop(inode), ci, mds, mseq, target); retry: down_read(&mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); @@ -3999,7 +4095,7 @@ retry: goto out_unlock; if (target < 0) { - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4010,12 +4106,13 @@ retry: issued = cap->issued; if (issued != cap->implemented) - pr_err_ratelimited("handle_cap_export: issued != implemented: " - "ino (%llx.%llx) mds%d seq %d mseq %d " - "issued %s implemented %s\n", - ceph_vinop(inode), mds, cap->seq, cap->mseq, - ceph_cap_string(issued), - ceph_cap_string(cap->implemented)); + pr_err_ratelimited_client(cl, "issued != implemented: " + "%p %llx.%llx mds%d seq %d mseq %d" + " issued %s implemented %s\n", + inode, ceph_vinop(inode), mds, + cap->seq, cap->mseq, + ceph_cap_string(issued), + ceph_cap_string(cap->implemented)); tcap = __get_cap_for_mds(ci, target); @@ -4023,7 +4120,8 @@ retry: /* already have caps from the target */ if (tcap->cap_id == t_cap_id && ceph_seq_cmp(tcap->seq, t_seq) < 0) { - dout(" updating import cap %p mds%d\n", tcap, target); + doutc(cl, " updating import cap %p mds%d\n", tcap, + target); tcap->cap_id = t_cap_id; tcap->seq = t_seq - 1; tcap->issue_seq = t_seq - 1; @@ -4034,7 +4132,7 @@ retry: change_auth_cap_ses(ci, tcap->session); } } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } else if (tsession) { /* add placeholder for the export tagert */ @@ -4051,7 +4149,7 @@ retry: spin_unlock(&mdsc->cap_dirty_lock); } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4104,6 +4202,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, struct ceph_cap **target_cap, int *old_issued) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; int issued; @@ -4124,8 +4223,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, peer = -1; } - dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", - inode, ci, mds, mseq, peer); + doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d peer %d\n", + inode, ceph_vinop(inode), ci, mds, mseq, peer); retry: cap = __get_cap_for_mds(ci, mds); if (!cap) { @@ -4151,20 +4250,20 @@ retry: ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; if (ocap && ocap->cap_id == p_cap_id) { - dout(" remove export cap %p mds%d flags %d\n", - ocap, peer, ph->flags); + doutc(cl, " remove export cap %p mds%d flags %d\n", + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && (ocap->seq != le32_to_cpu(ph->seq) || ocap->mseq != le32_to_cpu(ph->mseq))) { - pr_err_ratelimited("handle_cap_import: " - "mismatched seq/mseq: ino (%llx.%llx) " - "mds%d seq %d mseq %d importer mds%d " - "has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, ocap->seq, - ocap->mseq, mds, le32_to_cpu(ph->seq), + pr_err_ratelimited_client(cl, "mismatched seq/mseq: " + "%p %llx.%llx mds%d seq %d mseq %d" + " importer mds%d has peer seq %d mseq %d\n", + inode, ceph_vinop(inode), peer, + ocap->seq, ocap->mseq, mds, + le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } *old_issued = issued; @@ -4227,6 +4326,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_cap *cap; @@ -4245,7 +4345,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, bool close_sessions = false; bool do_cap_release = false; - dout("handle_caps from mds%d\n", session->s_mds); + doutc(cl, "from mds%d\n", session->s_mds); if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; @@ -4347,15 +4447,15 @@ void ceph_handle_caps(struct ceph_mds_session *session, /* lookup ino */ inode = ceph_find_inode(mdsc->fsc->sb, vino); - dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, - vino.snap, inode); + doutc(cl, " op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), + vino.ino, vino.snap, inode); mutex_lock(&session->s_mutex); - dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, - (unsigned)seq); + doutc(cl, " mds%d seq %lld cap seq %u\n", session->s_mds, + session->s_seq, (unsigned)seq); if (!inode) { - dout(" i don't have ino %llx\n", vino.ino); + doutc(cl, " i don't have ino %llx\n", vino.ino); switch (op) { case CEPH_CAP_OP_IMPORT: @@ -4410,9 +4510,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { - dout(" no cap on %p ino %llx.%llx from mds%d\n", - inode, ceph_ino(inode), ceph_snap(inode), - session->s_mds); + doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n", + inode, ceph_ino(inode), ceph_snap(inode), + session->s_mds); spin_unlock(&ci->i_ceph_lock); switch (op) { case CEPH_CAP_OP_REVOKE: @@ -4450,8 +4550,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, default: spin_unlock(&ci->i_ceph_lock); - pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, - ceph_cap_op_name(op)); + pr_err_client(cl, "unknown cap op %d %s\n", op, + ceph_cap_op_name(op)); } done: @@ -4492,7 +4592,7 @@ flush_cap_releases: goto done; bad: - pr_err("ceph_handle_caps: corrupt message\n"); + pr_err_client(cl, "corrupt message\n"); ceph_msg_dump(msg); goto out; } @@ -4506,6 +4606,7 @@ bad: */ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_mount_options *opt = mdsc->fsc->mount_options; @@ -4513,14 +4614,14 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) unsigned long loop_start = jiffies; unsigned long delay = 0; - dout("check_delayed_caps\n"); + doutc(cl, "begin\n"); spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_delay_list)) { ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) { - dout("%s caps added recently. Exiting loop", __func__); + doutc(cl, "caps added recently. Exiting loop"); delay = ci->i_hold_caps_max; break; } @@ -4532,13 +4633,15 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) inode = igrab(&ci->netfs.inode); if (inode) { spin_unlock(&mdsc->cap_delay_lock); - dout("check_delayed_caps on %p\n", inode); + doutc(cl, "on %p %llx.%llx\n", inode, + ceph_vinop(inode)); ceph_check_caps(ci, 0); iput(inode); spin_lock(&mdsc->cap_delay_lock); } } spin_unlock(&mdsc->cap_delay_lock); + doutc(cl, "done\n"); return delay; } @@ -4549,17 +4652,18 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) static void flush_dirty_session_caps(struct ceph_mds_session *s) { struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; - dout("flush_dirty_caps\n"); + doutc(cl, "begin\n"); spin_lock(&mdsc->cap_dirty_lock); while (!list_empty(&s->s_cap_dirty)) { ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info, i_dirty_item); inode = &ci->netfs.inode; ihold(inode); - dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_unlock(&mdsc->cap_dirty_lock); ceph_wait_on_async_create(inode); ceph_check_caps(ci, CHECK_CAPS_FLUSH); @@ -4567,7 +4671,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s) spin_lock(&mdsc->cap_dirty_lock); } spin_unlock(&mdsc->cap_dirty_lock); - dout("flush_dirty_caps done\n"); + doutc(cl, "done\n"); } void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) @@ -4672,7 +4776,7 @@ int ceph_drop_caps_for_unlink(struct inode *inode) if (__ceph_caps_dirty(ci)) { struct ceph_mds_client *mdsc = - ceph_inode_to_client(inode)->mdsc; + ceph_inode_to_fs_client(inode)->mdsc; __cap_delay_requeue_front(mdsc, ci); } } @@ -4692,6 +4796,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; int used, dirty; @@ -4701,9 +4806,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); - dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", - inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), - ceph_cap_string(unless)); + doutc(cl, "%p %llx.%llx mds%d used|dirty %s drop %s unless %s\n", + inode, ceph_vinop(inode), mds, ceph_cap_string(used|dirty), + ceph_cap_string(drop), ceph_cap_string(unless)); /* only drop unused, clean caps */ drop &= ~(used | dirty); @@ -4725,12 +4830,13 @@ int ceph_encode_inode_release(void **p, struct inode *inode, if (force || (cap->issued & drop)) { if (cap->issued & drop) { int wanted = __ceph_caps_wanted(ci); - dout("encode_inode_release %p cap %p " - "%s -> %s, wanted %s -> %s\n", inode, cap, - ceph_cap_string(cap->issued), - ceph_cap_string(cap->issued & ~drop), - ceph_cap_string(cap->mds_wanted), - ceph_cap_string(wanted)); + doutc(cl, "%p %llx.%llx cap %p %s -> %s, " + "wanted %s -> %s\n", inode, + ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), + ceph_cap_string(cap->issued & ~drop), + ceph_cap_string(cap->mds_wanted), + ceph_cap_string(wanted)); cap->issued &= ~drop; cap->implemented &= ~drop; @@ -4739,9 +4845,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, !(wanted & CEPH_CAP_ANY_FILE_WR)) ci->i_requested_max_size = 0; } else { - dout("encode_inode_release %p cap %p %s" - " (force)\n", inode, cap, - ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p %s (force)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued)); } rel->ino = cpu_to_le64(ceph_ino(inode)); @@ -4756,8 +4862,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, *p += sizeof(*rel); ret = 1; } else { - dout("encode_inode_release %p cap %p %s (noop)\n", - inode, cap, ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p %s (noop)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued)); } } spin_unlock(&ci->i_ceph_lock); @@ -4783,6 +4890,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); + struct ceph_client *cl; int force = 0; int ret; @@ -4804,10 +4912,11 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); dput(parent); + cl = ceph_inode_to_client(dir); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { - dout("encode_dentry_release %p mds%d seq %d\n", - dentry, mds, (int)di->lease_seq); + doutc(cl, "%p mds%d seq %d\n", dentry, mds, + (int)di->lease_seq); rel->dname_seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); spin_unlock(&dentry->d_lock); @@ -4833,12 +4942,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; int capsnap_release = 0; lockdep_assert_held(&ci->i_ceph_lock); - dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); + doutc(cl, "removing capsnaps, ci is %p, %p %llx.%llx\n", + ci, inode, ceph_vinop(inode)); while (!list_empty(&ci->i_cap_snaps)) { capsnap = list_first_entry(&ci->i_cap_snaps, @@ -4855,8 +4966,9 @@ static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate) { - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); bool is_auth; bool dirty_dropped = false; @@ -4864,8 +4976,8 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali lockdep_assert_held(&ci->i_ceph_lock); - dout("removing cap %p, ci is %p, inode is %p\n", - cap, ci, &ci->netfs.inode); + doutc(cl, "removing cap %p, ci is %p, %p %llx.%llx\n", + cap, ci, inode, ceph_vinop(inode)); is_auth = (cap == ci->i_auth_cap); __ceph_remove_cap(cap, false); @@ -4892,19 +5004,19 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali } if (!list_empty(&ci->i_dirty_item)) { - pr_warn_ratelimited( - " dropping dirty %s state for %p %lld\n", + pr_warn_ratelimited_client(cl, + " dropping dirty %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_dirty_caps), - inode, ceph_ino(inode)); + inode, ceph_vinop(inode)); ci->i_dirty_caps = 0; list_del_init(&ci->i_dirty_item); dirty_dropped = true; } if (!list_empty(&ci->i_flushing_item)) { - pr_warn_ratelimited( - " dropping dirty+flushing %s state for %p %lld\n", + pr_warn_ratelimited_client(cl, + " dropping dirty+flushing %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_flushing_caps), - inode, ceph_ino(inode)); + inode, ceph_vinop(inode)); ci->i_flushing_caps = 0; list_del_init(&ci->i_flushing_item); mdsc->num_cap_flushing--; @@ -4927,8 +5039,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali if (atomic_read(&ci->i_filelock_ref) > 0) { /* make further file lock syscall return -EIO */ ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; - pr_warn_ratelimited(" dropping file locks for %p %lld\n", - inode, ceph_ino(inode)); + pr_warn_ratelimited_client(cl, + " dropping file locks for %p %llx.%llx\n", + inode, ceph_vinop(inode)); } if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index e3b1c3fab412..3b3c4d8d401e 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -113,7 +113,7 @@ static int ceph_crypt_set_context(struct inode *inode, const void *ctx, cia.fscrypt_auth = cfa; - ret = __ceph_setattr(inode, &attr, &cia); + ret = __ceph_setattr(&nop_mnt_idmap, inode, &attr, &cia); if (ret == 0) inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); kfree(cia.fscrypt_auth); @@ -129,7 +129,7 @@ static bool ceph_crypt_empty_dir(struct inode *inode) static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb) { - return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy; + return ceph_sb_to_fs_client(sb)->fsc_dummy_enc_policy.policy; } static struct fscrypt_operations ceph_fscrypt_ops = { @@ -212,6 +212,7 @@ void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, static struct inode *parse_longname(const struct inode *parent, const char *name, int *name_len) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = NULL; struct ceph_vino vino = { .snap = CEPH_NOSNAP }; char *inode_number; @@ -223,12 +224,12 @@ static struct inode *parse_longname(const struct inode *parent, name++; name_end = strrchr(name, '_'); if (!name_end) { - dout("Failed to parse long snapshot name: %s\n", name); + doutc(cl, "failed to parse long snapshot name: %s\n", name); return ERR_PTR(-EIO); } *name_len = (name_end - name); if (*name_len <= 0) { - pr_err("Failed to parse long snapshot name\n"); + pr_err_client(cl, "failed to parse long snapshot name\n"); return ERR_PTR(-EIO); } @@ -240,7 +241,7 @@ static struct inode *parse_longname(const struct inode *parent, return ERR_PTR(-ENOMEM); ret = kstrtou64(inode_number, 10, &vino.ino); if (ret) { - dout("Failed to parse inode number: %s\n", name); + doutc(cl, "failed to parse inode number: %s\n", name); dir = ERR_PTR(ret); goto out; } @@ -251,7 +252,7 @@ static struct inode *parse_longname(const struct inode *parent, /* This can happen if we're not mounting cephfs on the root */ dir = ceph_get_inode(parent->i_sb, vino, NULL); if (IS_ERR(dir)) - dout("Can't find inode %s (%s)\n", inode_number, name); + doutc(cl, "can't find inode %s (%s)\n", inode_number, name); } out: @@ -262,6 +263,7 @@ out: int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, char *buf) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = parent; struct qstr iname; u32 len; @@ -330,7 +332,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, /* base64 encode the encrypted name */ elen = ceph_base64_encode(cryptbuf, len, buf); - dout("base64-encoded ciphertext name = %.*s\n", elen, buf); + doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, buf); /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ WARN_ON(elen > 240); @@ -505,7 +507,10 @@ int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { - dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); + struct ceph_client *cl = ceph_inode_to_client(inode); + + doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, + ceph_vinop(inode), len, offs, lblk_num); return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num); } @@ -514,7 +519,10 @@ int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { - dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); + struct ceph_client *cl = ceph_inode_to_client(inode); + + doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, + ceph_vinop(inode), len, offs, lblk_num); return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num, gfp_flags); } @@ -583,6 +591,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, u64 off, struct ceph_sparse_extent *map, u32 ext_cnt) { + struct ceph_client *cl = ceph_inode_to_client(inode); int i, ret = 0; struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; @@ -590,7 +599,8 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, /* Nothing to do for empty array */ if (ext_cnt == 0) { - dout("%s: empty array, ret 0\n", __func__); + doutc(cl, "%p %llx.%llx empty array, ret 0\n", inode, + ceph_vinop(inode)); return 0; } @@ -604,14 +614,17 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, int fret; if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) { - pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n", - __func__, i, ext->off, ext->len); + pr_warn_client(cl, + "%p %llx.%llx bad encrypted sparse extent " + "idx %d off %llx len %llx\n", + inode, ceph_vinop(inode), i, ext->off, + ext->len); return -EIO; } fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx], off + pgsoff, ext->len); - dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i, - ext->off, ext->len, fret); + doutc(cl, "%p %llx.%llx [%d] 0x%llx~0x%llx fret %d\n", inode, + ceph_vinop(inode), i, ext->off, ext->len, fret); if (fret < 0) { if (ret == 0) ret = fret; @@ -619,7 +632,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, } ret = pgsoff + fret; } - dout("%s: ret %d\n", __func__, ret); + doutc(cl, "ret %d\n", ret); return ret; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3904333fa6c3..24c08078f5aa 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -81,7 +81,7 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; @@ -100,7 +100,7 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; @@ -398,7 +398,7 @@ DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) { - dout("ceph_fs_debugfs_cleanup\n"); + doutc(fsc->client, "begin\n"); debugfs_remove(fsc->debugfs_bdi); debugfs_remove(fsc->debugfs_congestion_kb); debugfs_remove(fsc->debugfs_mdsmap); @@ -407,13 +407,14 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_status); debugfs_remove(fsc->debugfs_mdsc); debugfs_remove_recursive(fsc->debugfs_metrics_dir); + doutc(fsc->client, "done\n"); } void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { char name[100]; - dout("ceph_fs_debugfs_init\n"); + doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, @@ -469,6 +470,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &metrics_size_fops); debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc, &metrics_caps_fops); + doutc(fsc->client, "done\n"); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 854cbdd66661..91709934c8b1 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -109,7 +109,9 @@ static int fpos_cmp(loff_t l, loff_t r) * regardless of what dir changes take place on the * server. */ -static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, +static int note_last_dentry(struct ceph_fs_client *fsc, + struct ceph_dir_file_info *dfi, + const char *name, int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); @@ -120,7 +122,7 @@ static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, memcpy(dfi->last_name, name, len); dfi->last_name[len] = 0; dfi->next_offset = next_offset; - dout("note_last_dentry '%s'\n", dfi->last_name); + doutc(fsc->client, "'%s'\n", dfi->last_name); return 0; } @@ -130,6 +132,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, struct ceph_readdir_cache_control *cache_ctl) { struct inode *dir = d_inode(parent); + struct ceph_client *cl = ceph_inode_to_client(dir); struct dentry *dentry; unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1; loff_t ptr_pos = idx * sizeof(struct dentry *); @@ -142,7 +145,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, ceph_readdir_cache_release(cache_ctl); cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); if (!cache_ctl->page) { - dout(" page %lu not found\n", ptr_pgoff); + doutc(cl, " page %lu not found\n", ptr_pgoff); return ERR_PTR(-EAGAIN); } /* reading/filling the cache are serialized by @@ -185,13 +188,16 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, struct ceph_dir_file_info *dfi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(dir); + struct ceph_client *cl = ceph_inode_to_client(dir); struct dentry *dentry, *last = NULL; struct ceph_dentry_info *di; struct ceph_readdir_cache_control cache_ctl = {}; u64 idx = 0; int err = 0; - dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos); + doutc(cl, "%p %llx.%llx v%u at %llx\n", dir, ceph_vinop(dir), + (unsigned)shared_gen, ctx->pos); /* search start position */ if (ctx->pos > 2) { @@ -221,7 +227,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, dput(dentry); } - dout("__dcache_readdir %p cache idx %llu\n", dir, idx); + doutc(cl, "%p %llx.%llx cache idx %llu\n", dir, + ceph_vinop(dir), idx); } @@ -257,8 +264,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, spin_unlock(&dentry->d_lock); if (emit_dentry) { - dout(" %llx dentry %p %pd %p\n", di->offset, - dentry, dentry, d_inode(dentry)); + doutc(cl, " %llx dentry %p %pd %p\n", di->offset, + dentry, dentry, d_inode(dentry)); ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, ceph_present_inode(d_inode(dentry)), @@ -281,7 +288,8 @@ out: if (last) { int ret; di = ceph_dentry(last); - ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, + ret = note_last_dentry(fsc, dfi, last->d_name.name, + last->d_name.len, fpos_off(di->offset) + 1); if (ret < 0) err = ret; @@ -310,20 +318,23 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; int i; int err; unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; - dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); + doutc(cl, "%p %llx.%llx file %p pos %llx\n", inode, + ceph_vinop(inode), file, ctx->pos); if (dfi->file_info.flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ if (ctx->pos == 0) { - dout("readdir off 0 -> '.'\n"); + doutc(cl, "%p %llx.%llx off 0 -> '.'\n", inode, + ceph_vinop(inode)); if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode), inode->i_mode >> 12)) return 0; @@ -337,7 +348,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ino = ceph_present_inode(dentry->d_parent->d_inode); spin_unlock(&dentry->d_lock); - dout("readdir off 1 -> '..'\n"); + doutc(cl, "%p %llx.%llx off 1 -> '..'\n", inode, + ceph_vinop(inode)); if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12)) return 0; ctx->pos = 2; @@ -391,8 +403,8 @@ more: frag = fpos_frag(ctx->pos); } - dout("readdir fetching %llx.%llx frag %x offset '%s'\n", - ceph_vinop(inode), frag, dfi->last_name); + doutc(cl, "fetching %p %llx.%llx frag %x offset '%s'\n", + inode, ceph_vinop(inode), frag, dfi->last_name); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -446,12 +458,12 @@ more: ceph_mdsc_put_request(req); return err; } - dout("readdir got and parsed readdir result=%d on " - "frag %x, end=%d, complete=%d, hash_order=%d\n", - err, frag, - (int)req->r_reply_info.dir_end, - (int)req->r_reply_info.dir_complete, - (int)req->r_reply_info.hash_order); + doutc(cl, "%p %llx.%llx got and parsed readdir result=%d" + "on frag %x, end=%d, complete=%d, hash_order=%d\n", + inode, ceph_vinop(inode), err, frag, + (int)req->r_reply_info.dir_end, + (int)req->r_reply_info.dir_complete, + (int)req->r_reply_info.hash_order); rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { @@ -481,7 +493,8 @@ more: dfi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { - dout("readdir !did_prepopulate\n"); + doutc(cl, "%p %llx.%llx !did_prepopulate\n", inode, + ceph_vinop(inode)); /* disable readdir cache */ dfi->readdir_cache_idx = -1; /* preclude from marking dir complete */ @@ -494,8 +507,8 @@ more: rinfo->dir_entries + (rinfo->dir_nr-1); unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); - err = note_last_dentry(dfi, rde->name, rde->name_len, - next_offset); + err = note_last_dentry(fsc, dfi, rde->name, + rde->name_len, next_offset); if (err) { ceph_mdsc_put_request(dfi->last_readdir); dfi->last_readdir = NULL; @@ -508,9 +521,9 @@ more: } rinfo = &dfi->last_readdir->r_reply_info; - dout("readdir frag %x num %d pos %llx chunk first %llx\n", - dfi->frag, rinfo->dir_nr, ctx->pos, - rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); + doutc(cl, "%p %llx.%llx frag %x num %d pos %llx chunk first %llx\n", + inode, ceph_vinop(inode), dfi->frag, rinfo->dir_nr, ctx->pos, + rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); i = 0; /* search start position */ @@ -530,8 +543,9 @@ more: struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; if (rde->offset < ctx->pos) { - pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n", - __func__, rde->offset, ctx->pos); + pr_warn_client(cl, + "%p %llx.%llx rde->offset 0x%llx ctx->pos 0x%llx\n", + inode, ceph_vinop(inode), rde->offset, ctx->pos); return -EIO; } @@ -539,9 +553,9 @@ more: return -EIO; ctx->pos = rde->offset; - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", - i, rinfo->dir_nr, ctx->pos, - rde->name_len, rde->name, &rde->inode.in); + doutc(cl, "%p %llx.%llx (%d/%d) -> %llx '%.*s' %p\n", inode, + ceph_vinop(inode), i, rinfo->dir_nr, ctx->pos, + rde->name_len, rde->name, &rde->inode.in); if (!dir_emit(ctx, rde->name, rde->name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), @@ -552,7 +566,7 @@ more: * doesn't have enough memory, etc. So for next readdir * it will continue. */ - dout("filldir stopping us...\n"); + doutc(cl, "filldir stopping us...\n"); return 0; } @@ -583,7 +597,8 @@ more: kfree(dfi->last_name); dfi->last_name = NULL; } - dout("readdir next frag is %x\n", frag); + doutc(cl, "%p %llx.%llx next frag is %x\n", inode, + ceph_vinop(inode), frag); goto more; } dfi->file_info.flags |= CEPH_F_ATEND; @@ -598,20 +613,23 @@ more: spin_lock(&ci->i_ceph_lock); if (dfi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { - dout(" marking %p complete and ordered\n", inode); + doutc(cl, " marking %p %llx.%llx complete and ordered\n", + inode, ceph_vinop(inode)); /* use i_size to track number of entries in * readdir cache */ BUG_ON(dfi->readdir_cache_idx < 0); i_size_write(inode, dfi->readdir_cache_idx * sizeof(struct dentry*)); } else { - dout(" marking %p complete\n", inode); + doutc(cl, " marking %llx.%llx complete\n", + ceph_vinop(inode)); } __ceph_dir_set_complete(ci, dfi->dir_release_count, dfi->dir_ordered_count); spin_unlock(&ci->i_ceph_lock); } - dout("readdir %p file %p done.\n", inode, file); + doutc(cl, "%p %llx.%llx file %p done.\n", inode, ceph_vinop(inode), + file); return 0; } @@ -657,6 +675,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file->f_mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); loff_t retval; inode_lock(inode); @@ -676,7 +695,8 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) if (offset >= 0) { if (need_reset_readdir(dfi, offset)) { - dout("dir_llseek dropping %p content\n", file); + doutc(cl, "%p %llx.%llx dropping %p content\n", + inode, ceph_vinop(inode), file); reset_readdir(dfi); } else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek @@ -703,8 +723,9 @@ out: struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */ + struct ceph_client *cl = ceph_inode_to_client(parent); /* .snap dir? */ if (ceph_snap(parent) == CEPH_NOSNAP && @@ -713,8 +734,9 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct inode *inode = ceph_get_snapdir(parent); res = d_splice_alias(inode, dentry); - dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n", - dentry, dentry, inode, res); + doutc(cl, "ENOENT on snapdir %p '%pd', linking to " + "snapdir %p %llx.%llx. Spliced dentry %p\n", + dentry, dentry, inode, ceph_vinop(inode), res); if (res) dentry = res; } @@ -735,12 +757,15 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { + struct ceph_client *cl = req->r_mdsc->fsc->client; + if (err == -ENOENT) { /* no trace? */ err = 0; if (!req->r_reply_info.head->is_dentry) { - dout("ENOENT and no trace, dentry %p inode %p\n", - dentry, d_inode(dentry)); + doutc(cl, + "ENOENT and no trace, dentry %p inode %llx.%llx\n", + dentry, ceph_vinop(d_inode(dentry))); if (d_really_is_positive(dentry)) { d_drop(dentry); err = -ENOENT; @@ -771,15 +796,16 @@ static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_request *req; int op; int mask; int err; - dout("lookup %p dentry %p '%pd'\n", - dir, dentry, dentry); + doutc(cl, "%p %llx.%llx/'%pd' dentry %p\n", dir, ceph_vinop(dir), + dentry, dentry); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -802,7 +828,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct ceph_dentry_info *di = ceph_dentry(dentry); spin_lock(&ci->i_ceph_lock); - dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags); + doutc(cl, " dir %llx.%llx flags are 0x%lx\n", + ceph_vinop(dir), ci->i_ceph_flags); if (strncmp(dentry->d_name.name, fsc->mount_options->snapdir_name, dentry->d_name.len) && @@ -812,7 +839,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); spin_unlock(&ci->i_ceph_lock); - dout(" dir %p complete, -ENOENT\n", dir); + doutc(cl, " dir %llx.%llx complete, -ENOENT\n", + ceph_vinop(dir)); d_add(dentry, NULL); di->lease_shared_gen = atomic_read(&ci->i_shared_gen); return NULL; @@ -850,7 +878,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, } dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ - dout("lookup result=%p\n", dentry); + doutc(cl, "result=%p\n", dentry); return dentry; } @@ -885,6 +913,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -901,8 +930,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out; } - dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", - dir, dentry, mode, rdev); + doutc(cl, "%p %llx.%llx/'%pd' dentry %p mode 0%ho rdev %d\n", + dir, ceph_vinop(dir), dentry, dentry, mode, rdev); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -924,6 +953,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | @@ -993,6 +1023,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *dest) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; umode_t mode = S_IFLNK | 0777; @@ -1010,7 +1041,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, goto out; } - dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); + doutc(cl, "%p %llx.%llx/'%pd' to '%s'\n", dir, ceph_vinop(dir), dentry, + dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1040,6 +1072,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, } set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | @@ -1064,6 +1097,7 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -1076,10 +1110,11 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (ceph_snap(dir) == CEPH_SNAPDIR) { /* mkdir .snap/foo is a MKSNAP */ op = CEPH_MDS_OP_MKSNAP; - dout("mksnap dir %p snap '%pd' dn %p\n", dir, - dentry, dentry); + doutc(cl, "mksnap %llx.%llx/'%pd' dentry %p\n", + ceph_vinop(dir), dentry, dentry); } else if (ceph_snap(dir) == CEPH_NOSNAP) { - dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); + doutc(cl, "mkdir %llx.%llx/'%pd' dentry %p mode 0%ho\n", + ceph_vinop(dir), dentry, dentry, mode); op = CEPH_MDS_OP_MKDIR; } else { err = -EROFS; @@ -1117,6 +1152,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + if (op == CEPH_MDS_OP_MKDIR) + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_args.mkdir.mode = cpu_to_le32(mode); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | CEPH_CAP_XATTR_EXCL; @@ -1144,6 +1181,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int err; @@ -1161,8 +1199,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; - dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n", - dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry); + doutc(cl, "%p %llx.%llx/'%pd' to '%pd'\n", dir, ceph_vinop(dir), + old_dentry, dentry); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); if (IS_ERR(req)) { d_drop(dentry); @@ -1199,14 +1237,16 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { struct dentry *dentry = req->r_dentry; - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); + struct ceph_client *cl = fsc->client; struct ceph_dentry_info *di = ceph_dentry(dentry); int result = req->r_err ? req->r_err : le32_to_cpu(req->r_reply_info.head->result); if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) - pr_warn("%s dentry %p:%pd async unlink bit is not set\n", - __func__, dentry, dentry); + pr_warn_client(cl, + "dentry %p:%pd async unlink bit is not set\n", + dentry, dentry); spin_lock(&fsc->async_unlink_conflict_lock); hash_del_rcu(&di->hnode); @@ -1226,7 +1266,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &base, 0); /* mark error on parent + clear complete */ @@ -1240,8 +1280,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* mark inode itself for an error (since metadata is bogus) */ mapping_set_error(req->r_old_inode->i_mapping, result); - pr_warn("async unlink failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<<bad>>" : path, result); + pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n", + base, IS_ERR(path) ? "<<bad>>" : path, result); ceph_mdsc_free_path(path, pathlen); } out: @@ -1290,7 +1330,8 @@ static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry) */ static int ceph_unlink(struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; @@ -1300,11 +1341,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ - dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry); + doutc(cl, "rmsnap %llx.%llx/'%pd' dn\n", ceph_vinop(dir), + dentry); op = CEPH_MDS_OP_RMSNAP; } else if (ceph_snap(dir) == CEPH_NOSNAP) { - dout("unlink/rmdir dir %p dn %p inode %p\n", - dir, dentry, inode); + doutc(cl, "unlink/rmdir %llx.%llx/'%pd' inode %llx.%llx\n", + ceph_vinop(dir), dentry, ceph_vinop(inode)); op = d_is_dir(dentry) ? CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else @@ -1327,9 +1369,9 @@ retry: (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) { struct ceph_dentry_info *di = ceph_dentry(dentry); - dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir), - dentry->d_name.len, dentry->d_name.name, - ceph_cap_string(req->r_dir_caps)); + doutc(cl, "async unlink on %llx.%llx/'%pd' caps=%s", + ceph_vinop(dir), dentry, + ceph_cap_string(req->r_dir_caps)); set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags); req->r_callback = ceph_async_unlink_cb; req->r_old_inode = d_inode(dentry); @@ -1384,6 +1426,7 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *new_dentry, unsigned int flags) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int op = CEPH_MDS_OP_RENAME; int err; @@ -1413,8 +1456,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (err) return err; - dout("rename dir %p dentry %p to dir %p dentry %p\n", - old_dir, old_dentry, new_dir, new_dentry); + doutc(cl, "%llx.%llx/'%pd' to %llx.%llx/'%pd'\n", + ceph_vinop(old_dir), old_dentry, ceph_vinop(new_dir), + new_dentry); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -1459,9 +1503,10 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; - struct ceph_mds_client *mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; - dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn); + doutc(cl, "%p %p '%pd'\n", di, dn, dn); di->flags |= CEPH_DENTRY_LEASE_LIST; if (di->flags & CEPH_DENTRY_SHRINK_LIST) { @@ -1469,7 +1514,6 @@ void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_move_tail(&di->lease_list, &mdsc->dentry_leases); spin_unlock(&mdsc->dentry_list_lock); @@ -1493,10 +1537,10 @@ static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc, void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; - struct ceph_mds_client *mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; - dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n", - di, dn, dn, di->offset); + doutc(cl, "%p %p '%pd' (offset 0x%llx)\n", di, dn, dn, di->offset); if (!list_empty(&di->lease_list)) { if (di->flags & CEPH_DENTRY_LEASE_LIST) { @@ -1516,7 +1560,6 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); __dentry_dir_lease_touch(mdsc, di), spin_unlock(&mdsc->dentry_list_lock); @@ -1530,7 +1573,7 @@ static void __dentry_lease_unlist(struct ceph_dentry_info *di) if (list_empty(&di->lease_list)) return; - mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(di->dentry->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_del_init(&di->lease_list); spin_unlock(&mdsc->dentry_list_lock); @@ -1757,6 +1800,8 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) { struct ceph_dentry_info *di; struct ceph_mds_session *session = NULL; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; u32 seq = 0; int valid = 0; @@ -1789,7 +1834,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) CEPH_MDS_LEASE_RENEW, seq); ceph_put_mds_session(session); } - dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); + doutc(cl, "dentry %p = %d\n", dentry, valid); return valid; } @@ -1832,6 +1877,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, struct ceph_mds_client *mdsc) { struct ceph_inode_info *ci = ceph_inode(dir); + struct ceph_client *cl = mdsc->fsc->client; int valid; int shared_gen; @@ -1853,8 +1899,9 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, valid = 0; spin_unlock(&dentry->d_lock); } - dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n", - dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid); + doutc(cl, "dir %p %llx.%llx v%u dentry %p '%pd' = %d\n", dir, + ceph_vinop(dir), (unsigned)atomic_read(&ci->i_shared_gen), + dentry, dentry, valid); return valid; } @@ -1863,10 +1910,11 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, */ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) { + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; int valid = 0; struct dentry *parent; struct inode *dir, *inode; - struct ceph_mds_client *mdsc; valid = fscrypt_d_revalidate(dentry, flags); if (valid <= 0) @@ -1884,16 +1932,16 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); } - dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry, - dentry, inode, ceph_dentry(dentry)->offset, - !!(dentry->d_flags & DCACHE_NOKEY_NAME)); + doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n", + dentry, dentry, inode, ceph_dentry(dentry)->offset, + !!(dentry->d_flags & DCACHE_NOKEY_NAME)); - mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc; /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { - dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, - dentry, inode); + doutc(cl, "%p '%pd' inode %p is SNAPPED\n", dentry, + dentry, inode); valid = 1; } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { valid = 1; @@ -1948,14 +1996,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) break; } ceph_mdsc_put_request(req); - dout("d_revalidate %p lookup result=%d\n", - dentry, err); + doutc(cl, "%p '%pd', lookup result=%d\n", dentry, + dentry, err); } } else { percpu_counter_inc(&mdsc->metric.d_lease_hit); } - dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); + doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid"); if (!valid) ceph_dir_clear_complete(dir); @@ -1995,9 +2043,9 @@ static int ceph_d_delete(const struct dentry *dentry) static void ceph_d_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); - dout("d_release %p\n", dentry); + doutc(fsc->client, "dentry %p '%pd'\n", dentry, dentry); atomic64_dec(&fsc->mdsc->metric.total_dentries); @@ -2018,10 +2066,12 @@ static void ceph_d_release(struct dentry *dentry) */ static void ceph_d_prune(struct dentry *dentry) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *dir_ci; struct ceph_dentry_info *di; - dout("ceph_d_prune %pd %p\n", dentry, dentry); + doutc(cl, "dentry %p '%pd'\n", dentry, dentry); /* do we have a valid parent? */ if (IS_ROOT(dentry)) @@ -2064,7 +2114,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, int left; const int bufsize = 1024; - if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) + if (!ceph_test_mount_opt(ceph_sb_to_fs_client(inode->i_sb), DIRSTAT)) return -EISDIR; if (!dfi->dir_info) { diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 8559990a59a5..726af69d4d62 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -36,6 +36,7 @@ struct ceph_nfs_snapfh { static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); static const int snap_handle_length = sizeof(struct ceph_nfs_snapfh) >> 2; struct ceph_nfs_snapfh *sfh = (void *)rawfh; @@ -79,13 +80,14 @@ static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, *max_len = snap_handle_length; ret = FILEID_BTRFS_WITH_PARENT; out: - dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret); + doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret); return ret; } static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); static const int handle_length = sizeof(struct ceph_nfs_fh) >> 2; static const int connected_handle_length = @@ -105,15 +107,15 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, if (parent_inode) { struct ceph_nfs_confh *cfh = (void *)rawfh; - dout("encode_fh %llx with parent %llx\n", - ceph_ino(inode), ceph_ino(parent_inode)); + doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode, + ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode)); cfh->ino = ceph_ino(inode); cfh->parent_ino = ceph_ino(parent_inode); *max_len = connected_handle_length; type = FILEID_INO32_GEN_PARENT; } else { struct ceph_nfs_fh *fh = (void *)rawfh; - dout("encode_fh %llx\n", ceph_ino(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); fh->ino = ceph_ino(inode); *max_len = handle_length; type = FILEID_INO32_GEN; @@ -123,7 +125,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, static struct inode *__lookup_inode(struct super_block *sb, u64 ino) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; struct inode *inode; struct ceph_vino vino; int err; @@ -205,7 +207,8 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, struct ceph_nfs_snapfh *sfh, bool want_parent) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct inode *inode; struct ceph_vino vino; @@ -278,11 +281,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, ceph_mdsc_put_request(req); if (want_parent) { - dout("snapfh_to_parent %llx.%llx\n err=%d\n", - vino.ino, vino.snap, err); + doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err); } else { - dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d", - vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); + doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino, + vino.snap, sfh->parent_ino, sfh->hash, err); } if (IS_ERR(inode)) return ERR_CAST(inode); @@ -297,6 +299,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_nfs_fh *fh = (void *)fid->raw; if (fh_type == FILEID_BTRFS_WITH_PARENT) { @@ -310,14 +313,14 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, if (fh_len < sizeof(*fh) / 4) return NULL; - dout("fh_to_dentry %llx\n", fh->ino); + doutc(fsc->client, "%llx\n", fh->ino); return __fh_to_dentry(sb, fh->ino); } static struct dentry *__get_parent(struct super_block *sb, struct dentry *child, u64 ino) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; struct ceph_mds_request *req; struct inode *inode; int mask; @@ -363,6 +366,7 @@ static struct dentry *__get_parent(struct super_block *sb, static struct dentry *ceph_get_parent(struct dentry *child) { struct inode *inode = d_inode(child); + struct ceph_client *cl = ceph_inode_to_client(inode); struct dentry *dn; if (ceph_snap(inode) != CEPH_NOSNAP) { @@ -402,8 +406,8 @@ static struct dentry *ceph_get_parent(struct dentry *child) dn = __get_parent(child->d_sb, child, 0); } out: - dout("get_parent %p ino %llx.%llx err=%ld\n", - child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn)); + doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode, + ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn)); return dn; } @@ -414,6 +418,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_nfs_confh *cfh = (void *)fid->raw; struct dentry *dentry; @@ -427,7 +432,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, if (fh_len < sizeof(*cfh) / 4) return NULL; - dout("fh_to_parent %llx\n", cfh->parent_ino); + doutc(fsc->client, "%llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); @@ -439,7 +444,7 @@ static int __get_snap_name(struct dentry *parent, char *name, { struct inode *inode = d_inode(child); struct inode *dir = d_inode(parent); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_request *req = NULL; char *last_name = NULL; unsigned next_offset = 2; @@ -526,8 +531,8 @@ out: if (req) ceph_mdsc_put_request(req); kfree(last_name); - dout("get_snap_name %p ino %llx.%llx err=%d\n", - child, ceph_vinop(inode), err); + doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child, + inode, ceph_vinop(inode), err); return err; } @@ -544,7 +549,7 @@ static int ceph_get_name(struct dentry *parent, char *name, if (ceph_snap(inode) != CEPH_NOSNAP) return __get_snap_name(parent, name, child); - mdsc = ceph_inode_to_client(inode)->mdsc; + mdsc = ceph_inode_to_fs_client(inode)->mdsc; req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, USE_ANY_MDS); if (IS_ERR(req)) @@ -588,9 +593,9 @@ static int ceph_get_name(struct dentry *parent, char *name, ceph_fname_free_buffer(dir, &oname); } out: - dout("get_name %p ino %llx.%llx err %d %s%s\n", - child, ceph_vinop(inode), err, - err ? "" : "name ", err ? "" : name); + doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n", + child, inode, ceph_vinop(inode), err, err ? "" : "name ", + err ? "" : name); ceph_mdsc_put_request(req); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 649600d0a7b6..3b5aae29e944 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -19,8 +19,9 @@ #include "io.h" #include "metric.h" -static __le32 ceph_flags_sys2wire(u32 flags) +static __le32 ceph_flags_sys2wire(struct ceph_mds_client *mdsc, u32 flags) { + struct ceph_client *cl = mdsc->fsc->client; u32 wire_flags = 0; switch (flags & O_ACCMODE) { @@ -48,7 +49,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x\n", flags); + doutc(cl, "unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } @@ -189,7 +190,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode) if (IS_ERR(req)) goto out; req->r_fmode = ceph_flags_to_mode(flags); - req->r_args.open.flags = ceph_flags_sys2wire(flags); + req->r_args.open.flags = ceph_flags_sys2wire(mdsc, flags); req->r_args.open.mode = cpu_to_le32(create_mode); out: return req; @@ -200,12 +201,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mount_options *opt = - ceph_inode_to_client(&ci->netfs.inode)->mount_options; + ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi; int ret; - dout("%s %p %p 0%o (%s)\n", __func__, inode, file, - inode->i_mode, isdir ? "dir" : "regular"); + doutc(cl, "%p %llx.%llx %p 0%o (%s)\n", inode, ceph_vinop(inode), + file, inode->i_mode, isdir ? "dir" : "regular"); BUG_ON(inode->i_fop->release != ceph_release); if (isdir) { @@ -234,7 +236,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, spin_lock_init(&fi->rw_contexts_lock); INIT_LIST_HEAD(&fi->rw_contexts); - fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); + fi->filp_gen = READ_ONCE(ceph_inode_to_fs_client(inode)->filp_gen); if ((file->f_mode & FMODE_WRITE) && ceph_has_inline_data(ci)) { ret = ceph_uninline_data(file); @@ -259,6 +261,7 @@ error: */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { + struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -271,13 +274,13 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) break; case S_IFLNK: - dout("init_file %p %p 0%o (symlink)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (symlink)\n", inode, + ceph_vinop(inode), file, inode->i_mode); break; default: - dout("init_file %p %p 0%o (special)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (special)\n", inode, + ceph_vinop(inode), file, inode->i_mode); /* * we need to drop the open ref now, since we don't * have .release set to ceph_release. @@ -296,6 +299,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) int ceph_renew_caps(struct inode *inode, int fmode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; @@ -307,8 +311,9 @@ int ceph_renew_caps(struct inode *inode, int fmode) (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) { int issued = __ceph_caps_issued(ci, NULL); spin_unlock(&ci->i_ceph_lock); - dout("renew caps %p want %s issued %s updating mds_wanted\n", - inode, ceph_cap_string(wanted), ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx want %s issued %s updating mds_wanted\n", + inode, ceph_vinop(inode), ceph_cap_string(wanted), + ceph_cap_string(issued)); ceph_check_caps(ci, 0); return 0; } @@ -339,7 +344,8 @@ int ceph_renew_caps(struct inode *inode, int fmode) err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); out: - dout("renew caps %p open result=%d\n", inode, err); + doutc(cl, "%p %llx.%llx open result=%d\n", inode, ceph_vinop(inode), + err); return err < 0 ? err : 0; } @@ -352,7 +358,8 @@ out: int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *fi = file->private_data; @@ -360,7 +367,7 @@ int ceph_open(struct inode *inode, struct file *file) int flags, fmode, wanted; if (fi) { - dout("open file %p is already opened\n", file); + doutc(cl, "file %p is already opened\n", file); return 0; } @@ -374,8 +381,8 @@ int ceph_open(struct inode *inode, struct file *file) return err; } - dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode, - ceph_vinop(inode), file, flags, file->f_flags); + doutc(cl, "%p %llx.%llx file %p flags %d (%d)\n", inode, + ceph_vinop(inode), file, flags, file->f_flags); fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); @@ -399,9 +406,9 @@ int ceph_open(struct inode *inode, struct file *file) int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); - dout("open %p fmode %d want %s issued %s using existing\n", - inode, fmode, ceph_cap_string(wanted), - ceph_cap_string(issued)); + doutc(cl, "open %p fmode %d want %s issued %s using existing\n", + inode, fmode, ceph_cap_string(wanted), + ceph_cap_string(issued)); __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); @@ -421,7 +428,7 @@ int ceph_open(struct inode *inode, struct file *file) spin_unlock(&ci->i_ceph_lock); - dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); + doutc(cl, "open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); req = prepare_open_request(inode->i_sb, flags, 0); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -435,7 +442,7 @@ int ceph_open(struct inode *inode, struct file *file) if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); - dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); + doutc(cl, "open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); out: return err; } @@ -515,6 +522,7 @@ no_async: static void restore_deleg_ino(struct inode *dir, u64 ino) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_mds_session *s = NULL; @@ -525,7 +533,8 @@ static void restore_deleg_ino(struct inode *dir, u64 ino) if (s) { int err = ceph_restore_deleg_ino(s, ino); if (err) - pr_warn("ceph: unable to restore delegated ino 0x%llx to session: %d\n", + pr_warn_client(cl, + "unable to restore delegated ino 0x%llx to session: %d\n", ino, err); ceph_put_mds_session(s); } @@ -557,6 +566,7 @@ static void wake_async_create_waiters(struct inode *inode, static void ceph_async_create_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct dentry *dentry = req->r_dentry; struct inode *dinode = d_inode(dentry); struct inode *tinode = req->r_target_inode; @@ -574,10 +584,11 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &base, 0); - pr_warn("async create failure path=(%llx)%s result=%d!\n", + pr_warn_client(cl, + "async create failure path=(%llx)%s result=%d!\n", base, IS_ERR(path) ? "<<bad>>" : path, result); ceph_mdsc_free_path(path, pathlen); @@ -596,14 +607,15 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, u64 ino = ceph_vino(tinode).ino; if (req->r_deleg_ino != ino) - pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", - __func__, req->r_err, req->r_deleg_ino, ino); + pr_warn_client(cl, + "inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", + req->r_err, req->r_deleg_ino, ino); mapping_set_error(tinode->i_mapping, result); wake_async_create_waiters(tinode, req->r_session); } else if (!result) { - pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__, - req->r_deleg_ino); + pr_warn_client(cl, "no req->r_target_inode for 0x%llx\n", + req->r_deleg_ino); } out: ceph_mdsc_release_dir_caps(req); @@ -625,6 +637,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, struct timespec64 now; struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -655,7 +668,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); - in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); + in.uid = cpu_to_le32(from_kuid(&init_user_ns, + mapped_fsuid(req->r_mnt_idmap, + &init_user_ns))); if (dir->i_mode & S_ISGID) { in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); @@ -663,7 +678,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, if (S_ISDIR(mode)) mode |= S_ISGID; } else { - in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + in.gid = cpu_to_le32(from_kgid(&init_user_ns, + mapped_fsgid(req->r_mnt_idmap, + &init_user_ns))); } in.mode = cpu_to_le32((u32)mode); @@ -683,7 +700,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, req->r_fmode, NULL); up_read(&mdsc->snap_rwsem); if (ret) { - dout("%s failed to fill inode: %d\n", __func__, ret); + doutc(cl, "failed to fill inode: %d\n", ret); ceph_dir_clear_complete(dir); if (!d_unhashed(dentry)) d_drop(dentry); @@ -691,8 +708,8 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, } else { struct dentry *dn; - dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__, - vino.ino, ceph_ino(dir), dentry->d_name.name); + doutc(cl, "d_adding new inode 0x%llx to 0x%llx/%s\n", + vino.ino, ceph_ino(dir), dentry->d_name.name); ceph_dir_clear_ordered(dir); ceph_init_inode_acls(inode, as_ctx); if (inode->i_state & I_NEW) { @@ -730,7 +747,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct mnt_idmap *idmap = file_mnt_idmap(file); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct inode *new_inode = NULL; @@ -740,9 +759,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, int mask; int err; - dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", - dir, dentry, dentry, - d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", + dir, ceph_vinop(dir), dentry, dentry, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); if (dentry->d_name.len > NAME_MAX) return -ENAMETOOLONG; @@ -788,6 +807,8 @@ retry: mask |= CEPH_CAP_XATTR_SHARED; req->r_args.open.mask = cpu_to_le32(mask); req->r_parent = dir; + if (req->r_op == CEPH_MDS_OP_CREATE) + req->r_mnt_idmap = mnt_idmap_get(idmap); ihold(dir); if (IS_ENCRYPTED(dir)) { set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags); @@ -880,17 +901,18 @@ retry: goto out_req; if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) { /* make vfs retry on splice, ENOENT, or symlink */ - dout("atomic_open finish_no_open on dn %p\n", dn); + doutc(cl, "finish_no_open on dn %p\n", dn); err = finish_no_open(file, dn); } else { if (IS_ENCRYPTED(dir) && !fscrypt_has_permitted_context(dir, d_inode(dentry))) { - pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", + pr_warn_client(cl, + "Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", ceph_vinop(dir), ceph_vinop(d_inode(dentry))); goto out_req; } - dout("atomic_open finish_open on dn %p\n", dn); + doutc(cl, "finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { struct inode *newino = d_inode(dentry); @@ -905,17 +927,19 @@ out_req: iput(new_inode); out_ctx: ceph_release_acl_sec_ctx(&as_ctx); - dout("atomic_open result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } int ceph_release(struct inode *inode, struct file *file) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (S_ISDIR(inode->i_mode)) { struct ceph_dir_file_info *dfi = file->private_data; - dout("release inode %p dir file %p\n", inode, file); + doutc(cl, "%p %llx.%llx dir file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); ceph_put_fmode(ci, dfi->file_info.fmode, 1); @@ -927,7 +951,8 @@ int ceph_release(struct inode *inode, struct file *file) kmem_cache_free(ceph_dir_file_cachep, dfi); } else { struct ceph_file_info *fi = file->private_data; - dout("release inode %p regular file %p\n", inode, file); + doutc(cl, "%p %llx.%llx regular file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&fi->rw_contexts)); ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); @@ -962,7 +987,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, u64 *last_objver) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; ssize_t ret; u64 off = *ki_pos; @@ -971,7 +997,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 objver = 0; - dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len); + doutc(cl, "on inode %p %llx.%llx %llx~%llx\n", inode, + ceph_vinop(inode), *ki_pos, len); if (ceph_inode_is_shutdown(inode)) return -EIO; @@ -1005,8 +1032,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, /* determine new offset/length if encrypted */ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); - dout("sync_read orig %llu~%llu reading %llu~%llu", - off, len, read_off, read_len); + doutc(cl, "orig %llu~%llu reading %llu~%llu", off, len, + read_off, read_len); req = ceph_osdc_new_request(osdc, &ci->i_layout, ci->i_vino, read_off, &read_len, 0, 1, @@ -1059,8 +1086,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, objver = req->r_version; i_size = i_size_read(inode); - dout("sync_read %llu~%llu got %zd i_size %llu%s\n", - off, len, ret, i_size, (more ? " MORE" : "")); + doutc(cl, "%llu~%llu got %zd i_size %llu%s\n", off, len, + ret, i_size, (more ? " MORE" : "")); /* Fix it to go to end of extent map */ if (sparse && ret >= 0) @@ -1101,8 +1128,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, int zlen = min(len - ret, i_size - off - ret); int zoff = page_off + ret; - dout("sync_read zero gap %llu~%llu\n", - off + ret, off + ret + zlen); + doutc(cl, "zero gap %llu~%llu\n", off + ret, + off + ret + zlen); ceph_zero_page_vector_range(zoff, zlen, pages); ret += zlen; } @@ -1151,7 +1178,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, if (last_objver) *last_objver = objver; } - dout("sync_read result %zd retry_op %d\n", ret, *retry_op); + doutc(cl, "result %zd retry_op %d\n", ret, *retry_op); return ret; } @@ -1160,9 +1187,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct ceph_client *cl = ceph_inode_to_client(inode); - dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos, - iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); + doutc(cl, "on file %p %llx~%zx %s\n", file, iocb->ki_pos, + iov_iter_count(to), + (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL); } @@ -1190,6 +1219,7 @@ static void ceph_aio_retry_work(struct work_struct *work); static void ceph_aio_complete(struct inode *inode, struct ceph_aio_request *aio_req) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int ret; @@ -1203,7 +1233,7 @@ static void ceph_aio_complete(struct inode *inode, if (!ret) ret = aio_req->total_len; - dout("ceph_aio_complete %p rc %d\n", inode, ret); + doutc(cl, "%p %llx.%llx rc %d\n", inode, ceph_vinop(inode), ret); if (ret >= 0 && aio_req->write) { int dirty; @@ -1242,11 +1272,13 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric; unsigned int len = osd_data->bvec_pos.iter.bi_size; bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ); + struct ceph_client *cl = ceph_inode_to_client(inode); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); BUG_ON(!osd_data->num_bvecs); - dout("ceph_aio_complete_req %p rc %d bytes %u\n", inode, rc, len); + doutc(cl, "req %p inode %p %llx.%llx, rc %d bytes %u\n", req, + inode, ceph_vinop(inode), rc, len); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; @@ -1256,7 +1288,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; - queue_work(ceph_inode_to_client(inode)->inode_wq, + queue_work(ceph_inode_to_fs_client(inode)->inode_wq, &aio_work->work); return; } @@ -1386,7 +1418,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_client_metric *metric = &fsc->mdsc->metric; struct ceph_vino vino; struct ceph_osd_request *req; @@ -1405,9 +1438,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", - (write ? "write" : "read"), file, pos, (unsigned)count, - snapc, snapc ? snapc->seq : 0); + doutc(cl, "sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", + (write ? "write" : "read"), file, pos, (unsigned)count, + snapc, snapc ? snapc->seq : 0); if (write) { int ret2; @@ -1418,7 +1451,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, pos >> PAGE_SHIFT, (pos + count - 1) >> PAGE_SHIFT); if (ret2 < 0) - dout("invalidate_inode_pages2_range returned %d\n", ret2); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret2); flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; } else { @@ -1610,7 +1644,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_request *req; struct page **pages; @@ -1625,8 +1660,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_write on file %p %lld~%u snapc %p seq %lld\n", - file, pos, (unsigned)count, snapc, snapc->seq); + doutc(cl, "on file %p %lld~%u snapc %p seq %lld\n", file, pos, + (unsigned)count, snapc, snapc->seq); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count - 1); @@ -1670,9 +1705,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, last = (pos + len) != (write_pos + write_len); rmw = first || last; - dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", - ci->i_vino.ino, pos, len, write_pos, write_len, - rmw ? "" : "no "); + doutc(cl, "ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", + ci->i_vino.ino, pos, len, write_pos, write_len, + rmw ? "" : "no "); /* * The data is emplaced into the page as it would be if it were @@ -1881,7 +1916,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, left -= ret; } if (ret < 0) { - dout("sync_write write failed with %d\n", ret); + doutc(cl, "write failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1891,7 +1926,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, write_pos, write_len, GFP_KERNEL); if (ret < 0) { - dout("encryption failed with %d\n", ret); + doutc(cl, "encryption failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1910,7 +1945,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, break; } - dout("sync_write write op %lld~%llu\n", write_pos, write_len); + doutc(cl, "write op %lld~%llu\n", write_pos, write_len); osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len, offset_in_page(write_pos), false, true); @@ -1941,7 +1976,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, req->r_end_latency, len, ret); ceph_osdc_put_request(req); if (ret != 0) { - dout("sync_write osd write returned %d\n", ret); + doutc(cl, "osd write returned %d\n", ret); /* Version changed! Must re-do the rmw cycle */ if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) || (!assert_ver && ret == -EEXIST)) { @@ -1971,13 +2006,13 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, pos >> PAGE_SHIFT, (pos + len - 1) >> PAGE_SHIFT); if (ret < 0) { - dout("invalidate_inode_pages2_range returned %d\n", - ret); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret); ret = 0; } pos += len; written += len; - dout("sync_write written %d\n", written); + doutc(cl, "written %d\n", written); if (pos > i_size_read(inode)) { check_caps = ceph_inode_set_size(inode, pos); if (check_caps) @@ -1991,7 +2026,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, ret = written; iocb->ki_pos = pos; } - dout("sync_write returning %d\n", ret); + doutc(cl, "returning %d\n", ret); return ret; } @@ -2010,13 +2045,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); bool direct_lock = iocb->ki_flags & IOCB_DIRECT; + struct ceph_client *cl = ceph_inode_to_client(inode); ssize_t ret; int want = 0, got = 0; int retry_op = 0, read = 0; again: - dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); + doutc(cl, "%llu~%u trying to get caps on %p %llx.%llx\n", + iocb->ki_pos, (unsigned)len, inode, ceph_vinop(inode)); if (ceph_inode_is_shutdown(inode)) return -ESTALE; @@ -2044,9 +2080,9 @@ again: (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { - dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "sync %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); if (!ceph_has_inline_data(ci)) { if (!retry_op && @@ -2064,16 +2100,16 @@ again: } } else { CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); - dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "async %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); ceph_add_rw_context(fi, &rw_ctx); ret = generic_file_read_iter(iocb, to); ceph_del_rw_context(fi, &rw_ctx); } - dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", - inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); + doutc(cl, "%p %llx.%llx dropping cap refs on %s = %d\n", + inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ceph_put_cap_refs(ci, got); if (direct_lock) @@ -2133,8 +2169,8 @@ again: /* hit EOF or hole? */ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && ret < len) { - dout("sync_read hit hole, ppos %lld < size %lld" - ", reading more\n", iocb->ki_pos, i_size); + doutc(cl, "hit hole, ppos %lld < size %lld, reading more\n", + iocb->ki_pos, i_size); read += ret; len -= ret; @@ -2228,7 +2264,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; @@ -2296,8 +2333,9 @@ retry_snap: if (err) goto out; - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, count, i_size_read(inode)); + doutc(cl, "%p %llx.%llx %llu~%zd getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, count, + i_size_read(inode)); if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) want |= CEPH_CAP_FILE_BUFFER; if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2313,8 +2351,8 @@ retry_snap: inode_inc_iversion_raw(inode); - dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", - inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%zd got cap refs on %s\n", + inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) || @@ -2374,14 +2412,14 @@ retry_snap: ceph_check_caps(ci, CHECK_CAPS_FLUSH); } - dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)count, - ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%u dropping cap refs on %s\n", + inode, ceph_vinop(inode), pos, (unsigned)count, + ceph_cap_string(got)); ceph_put_cap_refs(ci, got); if (written == -EOLDSNAPC) { - dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", - inode, ceph_vinop(inode), pos, (unsigned)count); + doutc(cl, "%p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", + inode, ceph_vinop(inode), pos, (unsigned)count); goto retry_snap; } @@ -2462,7 +2500,7 @@ static int ceph_zero_partial_object(struct inode *inode, loff_t offset, loff_t *length) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_request *req; int ret = 0; loff_t zero = 0; @@ -2553,14 +2591,15 @@ static long ceph_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *prealloc_cf; + struct ceph_client *cl = ceph_inode_to_client(inode); int want, got = 0; int dirty; int ret = 0; loff_t endoff = 0; loff_t size; - dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__, - inode, ceph_vinop(inode), mode, offset, length); + doutc(cl, "%p %llx.%llx mode %x, offset %llu length %llu\n", + inode, ceph_vinop(inode), mode, offset, length); if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -2689,6 +2728,7 @@ static void put_rd_wr_caps(struct ceph_inode_info *src_ci, int src_got, static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, loff_t src_off, loff_t dst_off, size_t len) { + struct ceph_client *cl = ceph_inode_to_client(src_inode); loff_t size, endoff; size = i_size_read(src_inode); @@ -2699,8 +2739,8 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, * inode. */ if (src_off + len > size) { - dout("Copy beyond EOF (%llu + %zu > %llu)\n", - src_off, len, size); + doutc(cl, "Copy beyond EOF (%llu + %zu > %llu)\n", src_off, + len, size); return -EOPNOTSUPP; } size = i_size_read(dst_inode); @@ -2776,6 +2816,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off u64 src_objnum, src_objoff, dst_objnum, dst_objoff; u32 src_objlen, dst_objlen; u32 object_size = src_ci->i_layout.object_size; + struct ceph_client *cl = fsc->client; int ret; src_oloc.pool = src_ci->i_layout.pool_id; @@ -2817,9 +2858,10 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off if (ret) { if (ret == -EOPNOTSUPP) { fsc->have_copy_from2 = false; - pr_notice("OSDs don't support copy-from2; disabling copy offload\n"); + pr_notice_client(cl, + "OSDs don't support copy-from2; disabling copy offload\n"); } - dout("ceph_osdc_copy_from returned %d\n", ret); + doutc(cl, "returned %d\n", ret); if (!bytes) bytes = ret; goto out; @@ -2845,7 +2887,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct ceph_inode_info *src_ci = ceph_inode(src_inode); struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); struct ceph_cap_flush *prealloc_cf; - struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode); + struct ceph_fs_client *src_fsc = ceph_inode_to_fs_client(src_inode); + struct ceph_client *cl = src_fsc->client; loff_t size; ssize_t ret = -EIO, bytes; u64 src_objnum, dst_objnum, src_objoff, dst_objoff; @@ -2853,7 +2896,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, int src_got = 0, dst_got = 0, err, dirty; if (src_inode->i_sb != dst_inode->i_sb) { - struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode); + struct ceph_fs_client *dst_fsc = ceph_inode_to_fs_client(dst_inode); if (ceph_fsid_compare(&src_fsc->client->fsid, &dst_fsc->client->fsid)) { @@ -2888,7 +2931,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, (src_ci->i_layout.stripe_count != 1) || (dst_ci->i_layout.stripe_count != 1) || (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) { - dout("Invalid src/dst files layout\n"); + doutc(cl, "Invalid src/dst files layout\n"); return -EOPNOTSUPP; } @@ -2906,12 +2949,12 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, /* Start by sync'ing the source and destination files */ ret = file_write_and_wait_range(src_file, src_off, (src_off + len)); if (ret < 0) { - dout("failed to write src file (%zd)\n", ret); + doutc(cl, "failed to write src file (%zd)\n", ret); goto out; } ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len)); if (ret < 0) { - dout("failed to write dst file (%zd)\n", ret); + doutc(cl, "failed to write dst file (%zd)\n", ret); goto out; } @@ -2923,7 +2966,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, err = get_rd_wr_caps(src_file, &src_got, dst_file, (dst_off + len), &dst_got); if (err < 0) { - dout("get_rd_wr_caps returned %d\n", err); + doutc(cl, "get_rd_wr_caps returned %d\n", err); ret = -EOPNOTSUPP; goto out; } @@ -2938,7 +2981,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, dst_off >> PAGE_SHIFT, (dst_off + len) >> PAGE_SHIFT); if (ret < 0) { - dout("Failed to invalidate inode pages (%zd)\n", ret); + doutc(cl, "Failed to invalidate inode pages (%zd)\n", + ret); ret = 0; /* XXX */ } ceph_calc_file_object_mapping(&src_ci->i_layout, src_off, @@ -2959,7 +3003,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * starting at the src_off */ if (src_objoff) { - dout("Initial partial copy of %u bytes\n", src_objlen); + doutc(cl, "Initial partial copy of %u bytes\n", src_objlen); /* * we need to temporarily drop all caps as we'll be calling @@ -2970,7 +3014,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, &dst_off, src_objlen, flags); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { - dout("Failed partial copy (%zd)\n", ret); + doutc(cl, "Failed partial copy (%zd)\n", ret); goto out; } len -= ret; @@ -2992,7 +3036,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = bytes; goto out_caps; } - dout("Copied %zu bytes out of %zu\n", bytes, len); + doutc(cl, "Copied %zu bytes out of %zu\n", bytes, len); len -= bytes; ret += bytes; @@ -3020,13 +3064,13 @@ out_caps: * there were errors in remote object copies (len >= object_size). */ if (len && (len < src_ci->i_layout.object_size)) { - dout("Final partial copy of %zu bytes\n", len); + doutc(cl, "Final partial copy of %zu bytes\n", len); bytes = do_splice_direct(src_file, &src_off, dst_file, &dst_off, len, flags); if (bytes > 0) ret += bytes; else - dout("Failed partial copy (%zd)\n", bytes); + doutc(cl, "Failed partial copy (%zd)\n", bytes); } out: diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2e2a303b9e64..0679240f06db 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -129,6 +129,8 @@ void ceph_as_ctx_to_req(struct ceph_mds_request *req, struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, struct inode *newino) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; if (ceph_vino_is_reserved(vino)) @@ -145,12 +147,13 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, } if (!inode) { - dout("No inode found for %llx.%llx\n", vino.ino, vino.snap); + doutc(cl, "no inode found for %llx.%llx\n", vino.ino, vino.snap); return ERR_PTR(-ENOMEM); } - dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode), - ceph_vinop(inode), inode, !!(inode->i_state & I_NEW)); + doutc(cl, "on %llx=%llx.%llx got %p new %d\n", + ceph_present_inode(inode), ceph_vinop(inode), inode, + !!(inode->i_state & I_NEW)); return inode; } @@ -159,6 +162,7 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, */ struct inode *ceph_get_snapdir(struct inode *parent) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct ceph_vino vino = { .ino = ceph_ino(parent), .snap = CEPH_SNAPDIR, @@ -171,14 +175,14 @@ struct inode *ceph_get_snapdir(struct inode *parent) return inode; if (!S_ISDIR(parent->i_mode)) { - pr_warn_once("bad snapdir parent type (mode=0%o)\n", - parent->i_mode); + pr_warn_once_client(cl, "bad snapdir parent type (mode=0%o)\n", + parent->i_mode); goto err; } if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) { - pr_warn_once("bad snapdir inode type (mode=0%o)\n", - inode->i_mode); + pr_warn_once_client(cl, "bad snapdir inode type (mode=0%o)\n", + inode->i_mode); goto err; } @@ -203,7 +207,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) inode->i_flags |= S_ENCRYPTED; ci->fscrypt_auth_len = pci->fscrypt_auth_len; } else { - dout("Failed to alloc snapdir fscrypt_auth\n"); + doutc(cl, "Failed to alloc snapdir fscrypt_auth\n"); ret = -ENOMEM; goto err; } @@ -249,6 +253,8 @@ const struct inode_operations ceph_file_iops = { static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, u32 f) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_frag *frag; @@ -279,8 +285,7 @@ static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, rb_link_node(&frag->node, parent, p); rb_insert_color(&frag->node, &ci->i_fragtree); - dout("get_or_create_frag added %llx.%llx frag %x\n", - ceph_vinop(&ci->netfs.inode), f); + doutc(cl, "added %p %llx.%llx frag %x\n", inode, ceph_vinop(inode), f); return frag; } @@ -313,6 +318,7 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, struct ceph_inode_frag *pfrag, int *found) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); u32 t = ceph_frag_make(0, 0); struct ceph_inode_frag *frag; unsigned nway, i; @@ -336,8 +342,8 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, /* choose child */ nway = 1 << frag->split_by; - dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t, - frag->split_by, nway); + doutc(cl, "frag(%x) %x splits by %d (%d ways)\n", v, t, + frag->split_by, nway); for (i = 0; i < nway; i++) { n = ceph_frag_make_child(t, frag->split_by, i); if (ceph_frag_contains_value(n, v)) { @@ -347,7 +353,7 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, } BUG_ON(i == nway); } - dout("choose_frag(%x) = %x\n", v, t); + doutc(cl, "frag(%x) = %x\n", v, t); return t; } @@ -371,6 +377,7 @@ static int ceph_fill_dirfrag(struct inode *inode, struct ceph_mds_reply_dirfrag *dirinfo) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_frag *frag; u32 id = le32_to_cpu(dirinfo->frag); int mds = le32_to_cpu(dirinfo->auth); @@ -395,14 +402,14 @@ static int ceph_fill_dirfrag(struct inode *inode, goto out; if (frag->split_by == 0) { /* tree leaf, remove */ - dout("fill_dirfrag removed %llx.%llx frag %x" - " (no ref)\n", ceph_vinop(inode), id); + doutc(cl, "removed %p %llx.%llx frag %x (no ref)\n", + inode, ceph_vinop(inode), id); rb_erase(&frag->node, &ci->i_fragtree); kfree(frag); } else { /* tree branch, keep and clear */ - dout("fill_dirfrag cleared %llx.%llx frag %x" - " referral\n", ceph_vinop(inode), id); + doutc(cl, "cleared %p %llx.%llx frag %x referral\n", + inode, ceph_vinop(inode), id); frag->mds = -1; frag->ndist = 0; } @@ -415,8 +422,9 @@ static int ceph_fill_dirfrag(struct inode *inode, if (IS_ERR(frag)) { /* this is not the end of the world; we can continue with bad/inaccurate delegation info */ - pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n", - ceph_vinop(inode), le32_to_cpu(dirinfo->frag)); + pr_err_client(cl, "ENOMEM on mds ref %p %llx.%llx fg %x\n", + inode, ceph_vinop(inode), + le32_to_cpu(dirinfo->frag)); err = -ENOMEM; goto out; } @@ -425,8 +433,8 @@ static int ceph_fill_dirfrag(struct inode *inode, frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP); for (i = 0; i < frag->ndist; i++) frag->dist[i] = le32_to_cpu(dirinfo->dist[i]); - dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n", - ceph_vinop(inode), frag->frag, frag->ndist); + doutc(cl, "%p %llx.%llx frag %x ndist=%d\n", inode, + ceph_vinop(inode), frag->frag, frag->ndist); out: mutex_unlock(&ci->i_fragtree_mutex); @@ -454,6 +462,7 @@ static int ceph_fill_fragtree(struct inode *inode, struct ceph_frag_tree_head *fragtree, struct ceph_mds_reply_dirfrag *dirinfo) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_frag *frag, *prev_frag = NULL; struct rb_node *rb_node; @@ -489,15 +498,15 @@ static int ceph_fill_fragtree(struct inode *inode, frag_tree_split_cmp, NULL); } - dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); rb_node = rb_first(&ci->i_fragtree); for (i = 0; i < nsplits; i++) { id = le32_to_cpu(fragtree->splits[i].frag); split_by = le32_to_cpu(fragtree->splits[i].by); if (split_by == 0 || ceph_frag_bits(id) + split_by > 24) { - pr_err("fill_fragtree %llx.%llx invalid split %d/%u, " - "frag %x split by %d\n", ceph_vinop(inode), - i, nsplits, id, split_by); + pr_err_client(cl, "%p %llx.%llx invalid split %d/%u, " + "frag %x split by %d\n", inode, + ceph_vinop(inode), i, nsplits, id, split_by); continue; } frag = NULL; @@ -529,7 +538,7 @@ static int ceph_fill_fragtree(struct inode *inode, if (frag->split_by == 0) ci->i_fragtree_nsplits++; frag->split_by = split_by; - dout(" frag %x split by %d\n", frag->frag, frag->split_by); + doutc(cl, " frag %x split by %d\n", frag->frag, frag->split_by); prev_frag = frag; } while (rb_node) { @@ -554,6 +563,7 @@ out_unlock: */ struct inode *ceph_alloc_inode(struct super_block *sb) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_inode_info *ci; int i; @@ -561,7 +571,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) if (!ci) return NULL; - dout("alloc_inode %p\n", &ci->netfs.inode); + doutc(fsc->client, "%p\n", &ci->netfs.inode); /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops); @@ -675,10 +685,11 @@ void ceph_evict_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_frag *frag; struct rb_node *n; - dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + doutc(cl, "%p ino %llx.%llx\n", inode, ceph_vinop(inode)); percpu_counter_dec(&mdsc->metric.total_inodes); @@ -701,8 +712,8 @@ void ceph_evict_inode(struct inode *inode) */ if (ci->i_snap_realm) { if (ceph_snap(inode) == CEPH_NOSNAP) { - dout(" dropping residual ref to snap realm %p\n", - ci->i_snap_realm); + doutc(cl, " dropping residual ref to snap realm %p\n", + ci->i_snap_realm); ceph_change_snap_realm(inode, NULL); } else { ceph_put_snapid_map(mdsc, ci->i_snapid_map); @@ -743,15 +754,16 @@ static inline blkcnt_t calc_inode_blocks(u64 size) int ceph_fill_file_size(struct inode *inode, int issued, u32 truncate_seq, u64 truncate_size, u64 size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int queue_trunc = 0; loff_t isize = i_size_read(inode); if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 || (truncate_seq == ci->i_truncate_seq && size > isize)) { - dout("size %lld -> %llu\n", isize, size); + doutc(cl, "size %lld -> %llu\n", isize, size); if (size > 0 && S_ISDIR(inode->i_mode)) { - pr_err("fill_file_size non-zero size for directory\n"); + pr_err_client(cl, "non-zero size for directory\n"); size = 0; } i_size_write(inode, size); @@ -764,8 +776,8 @@ int ceph_fill_file_size(struct inode *inode, int issued, ceph_fscache_update(inode); ci->i_reported_size = size; if (truncate_seq != ci->i_truncate_seq) { - dout("%s truncate_seq %u -> %u\n", __func__, - ci->i_truncate_seq, truncate_seq); + doutc(cl, "truncate_seq %u -> %u\n", + ci->i_truncate_seq, truncate_seq); ci->i_truncate_seq = truncate_seq; /* the MDS should have revoked these caps */ @@ -794,14 +806,15 @@ int ceph_fill_file_size(struct inode *inode, int issued, * anyway. */ if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) { - dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__, - ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode)); + doutc(cl, "truncate_size %lld -> %llu, encrypted %d\n", + ci->i_truncate_size, truncate_size, + !!IS_ENCRYPTED(inode)); ci->i_truncate_size = truncate_size; if (IS_ENCRYPTED(inode)) { - dout("%s truncate_pagecache_size %lld -> %llu\n", - __func__, ci->i_truncate_pagecache_size, size); + doutc(cl, "truncate_pagecache_size %lld -> %llu\n", + ci->i_truncate_pagecache_size, size); ci->i_truncate_pagecache_size = size; } else { ci->i_truncate_pagecache_size = truncate_size; @@ -814,6 +827,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, u64 time_warp_seq, struct timespec64 *ctime, struct timespec64 *mtime, struct timespec64 *atime) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct timespec64 ictime = inode_get_ctime(inode); int warn = 0; @@ -825,7 +839,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || timespec64_compare(ctime, &ictime) > 0) { - dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", + doutc(cl, "ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", ictime.tv_sec, ictime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode_set_ctime_to_ts(inode, *ctime); @@ -833,8 +847,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ - dout("mtime %lld.%09ld -> %lld.%09ld " - "tw %d -> %d\n", + doutc(cl, "mtime %lld.%09ld -> %lld.%09ld tw %d -> %d\n", inode_get_mtime_sec(inode), inode_get_mtime_nsec(inode), mtime->tv_sec, mtime->tv_nsec, @@ -849,14 +862,14 @@ void ceph_fill_file_time(struct inode *inode, int issued, /* nobody did utimes(); take the max */ ts = inode_get_mtime(inode); if (timespec64_compare(mtime, &ts) > 0) { - dout("mtime %lld.%09ld -> %lld.%09ld inc\n", + doutc(cl, "mtime %lld.%09ld -> %lld.%09ld inc\n", ts.tv_sec, ts.tv_nsec, mtime->tv_sec, mtime->tv_nsec); inode_set_mtime_to_ts(inode, *mtime); } ts = inode_get_atime(inode); if (timespec64_compare(atime, &ts) > 0) { - dout("atime %lld.%09ld -> %lld.%09ld inc\n", + doutc(cl, "atime %lld.%09ld -> %lld.%09ld inc\n", ts.tv_sec, ts.tv_nsec, atime->tv_sec, atime->tv_nsec); inode_set_atime_to_ts(inode, *atime); @@ -878,13 +891,16 @@ void ceph_fill_file_time(struct inode *inode, int issued, } } if (warn) /* time_warp_seq shouldn't go backwards */ - dout("%p mds time_warp_seq %llu < %u\n", - inode, time_warp_seq, ci->i_time_warp_seq); + doutc(cl, "%p mds time_warp_seq %llu < %u\n", inode, + time_warp_seq, ci->i_time_warp_seq); } #if IS_ENABLED(CONFIG_FS_ENCRYPTION) -static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) +static int decode_encrypted_symlink(struct ceph_mds_client *mdsc, + const char *encsym, + int enclen, u8 **decsym) { + struct ceph_client *cl = mdsc->fsc->client; int declen; u8 *sym; @@ -894,8 +910,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) declen = ceph_base64_decode(encsym, enclen, sym); if (declen < 0) { - pr_err("%s: can't decode symlink (%d). Content: %.*s\n", - __func__, declen, enclen, encsym); + pr_err_client(cl, + "can't decode symlink (%d). Content: %.*s\n", + declen, enclen, encsym); kfree(sym); return -EIO; } @@ -904,7 +921,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) return declen; } #else -static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym) +static int decode_encrypted_symlink(struct ceph_mds_client *mdsc, + const char *encsym, + int symlen, u8 **decsym) { return -EOPNOTSUPP; } @@ -921,6 +940,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, struct ceph_cap_reservation *caps_reservation) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; @@ -939,25 +959,26 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, lockdep_assert_held(&mdsc->snap_rwsem); - dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, - inode, ceph_vinop(inode), le64_to_cpu(info->version), - ci->i_version); + doutc(cl, "%p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), + le64_to_cpu(info->version), ci->i_version); /* Once I_NEW is cleared, we can't change type or dev numbers */ if (inode->i_state & I_NEW) { inode->i_mode = mode; } else { if (inode_wrong_type(inode, mode)) { - pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", - ceph_vinop(inode), inode->i_mode, mode); + pr_warn_once_client(cl, + "inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", + ceph_vinop(inode), inode->i_mode, mode); return -ESTALE; } if ((S_ISCHR(mode) || S_ISBLK(mode)) && inode->i_rdev != rdev) { - pr_warn_once("dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n", - ceph_vinop(inode), MAJOR(inode->i_rdev), - MINOR(inode->i_rdev), MAJOR(rdev), - MINOR(rdev)); + pr_warn_once_client(cl, + "dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n", + ceph_vinop(inode), MAJOR(inode->i_rdev), + MINOR(inode->i_rdev), MAJOR(rdev), + MINOR(rdev)); return -ESTALE; } } @@ -979,8 +1000,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (iinfo->xattr_len > 4) { xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); if (!xattr_blob) - pr_err("%s ENOMEM xattr blob %d bytes\n", __func__, - iinfo->xattr_len); + pr_err_client(cl, "ENOMEM xattr blob %d bytes\n", + iinfo->xattr_len); } if (iinfo->pool_ns_len > 0) @@ -1034,9 +1055,10 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_mode = mode; inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); - dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, - from_kuid(&init_user_ns, inode->i_uid), - from_kgid(&init_user_ns, inode->i_gid)); + doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode, + ceph_vinop(inode), inode->i_mode, + from_kuid(&init_user_ns, inode->i_uid), + from_kgid(&init_user_ns, inode->i_gid)); ceph_decode_timespec64(&ci->i_btime, &iinfo->btime); ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime); } @@ -1092,7 +1114,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) { size = fsize; } else { - pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n", + pr_warn_client(cl, + "fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n", info->size, size); } } @@ -1104,8 +1127,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, /* only update max_size on auth cap */ if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); + doutc(cl, "max_size %lld -> %llu\n", + ci->i_max_size, le64_to_cpu(info->max_size)); ci->i_max_size = le64_to_cpu(info->max_size); } } @@ -1168,15 +1191,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (IS_ENCRYPTED(inode)) { if (symlen != i_size_read(inode)) - pr_err("%s %llx.%llx BAD symlink size %lld\n", - __func__, ceph_vinop(inode), + pr_err_client(cl, + "%p %llx.%llx BAD symlink size %lld\n", + inode, ceph_vinop(inode), i_size_read(inode)); - err = decode_encrypted_symlink(iinfo->symlink, + err = decode_encrypted_symlink(mdsc, iinfo->symlink, symlen, (u8 **)&sym); if (err < 0) { - pr_err("%s decoding encrypted symlink failed: %d\n", - __func__, err); + pr_err_client(cl, + "decoding encrypted symlink failed: %d\n", + err); goto out; } symlen = err; @@ -1184,8 +1209,9 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_blocks = calc_inode_blocks(symlen); } else { if (symlen != i_size_read(inode)) { - pr_err("%s %llx.%llx BAD symlink size %lld\n", - __func__, ceph_vinop(inode), + pr_err_client(cl, + "%p %llx.%llx BAD symlink size %lld\n", + inode, ceph_vinop(inode), i_size_read(inode)); i_size_write(inode, symlen); inode->i_blocks = calc_inode_blocks(symlen); @@ -1220,8 +1246,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_fop = &ceph_dir_fops; break; default: - pr_err("%s %llx.%llx BAD mode 0%o\n", __func__, - ceph_vinop(inode), inode->i_mode); + pr_err_client(cl, "%p %llx.%llx BAD mode 0%o\n", inode, + ceph_vinop(inode), inode->i_mode); } /* were we issued a capability? */ @@ -1242,7 +1268,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, (info_caps & CEPH_CAP_FILE_SHARED) && (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { - dout(" marking %p complete (empty)\n", inode); + doutc(cl, " marking %p complete (empty)\n", + inode); i_size_write(inode, 0); __ceph_dir_set_complete(ci, atomic64_read(&ci->i_release_count), @@ -1251,8 +1278,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, wake = true; } else { - dout(" %p got snap_caps %s\n", inode, - ceph_cap_string(info_caps)); + doutc(cl, " %p got snap_caps %s\n", inode, + ceph_cap_string(info_caps)); ci->i_snap_caps |= info_caps; } } @@ -1268,8 +1295,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (cap_fmode >= 0) { if (!info_caps) - pr_warn("mds issued no caps on %llx.%llx\n", - ceph_vinop(inode)); + pr_warn_client(cl, "mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); __ceph_touch_fmode(ci, mdsc, cap_fmode); } @@ -1315,14 +1342,14 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, unsigned long from_time, struct ceph_mds_session **old_lease_session) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_dentry_info *di = ceph_dentry(dentry); unsigned mask = le16_to_cpu(lease->mask); long unsigned duration = le32_to_cpu(lease->duration_ms); long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; - dout("update_dentry_lease %p duration %lu ms ttl %lu\n", - dentry, duration, ttl); + doutc(cl, "%p duration %lu ms ttl %lu\n", dentry, duration, ttl); /* only track leases on regular dentries */ if (ceph_snap(dir) != CEPH_NOSNAP) @@ -1423,6 +1450,7 @@ out_unlock: */ static int splice_dentry(struct dentry **pdn, struct inode *in) { + struct ceph_client *cl = ceph_inode_to_client(in); struct dentry *dn = *pdn; struct dentry *realdn; @@ -1454,23 +1482,21 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) d_drop(dn); realdn = d_splice_alias(in, dn); if (IS_ERR(realdn)) { - pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", - PTR_ERR(realdn), dn, in, ceph_vinop(in)); + pr_err_client(cl, "error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); return PTR_ERR(realdn); } if (realdn) { - dout("dn %p (%d) spliced with %p (%d) " - "inode %p ino %llx.%llx\n", - dn, d_count(dn), - realdn, d_count(realdn), - d_inode(realdn), ceph_vinop(d_inode(realdn))); + doutc(cl, "dn %p (%d) spliced with %p (%d) inode %p ino %llx.%llx\n", + dn, d_count(dn), realdn, d_count(realdn), + d_inode(realdn), ceph_vinop(d_inode(realdn))); dput(dn); *pdn = realdn; } else { BUG_ON(!ceph_dentry(dn)); - dout("dn %p attached to %p ino %llx.%llx\n", - dn, d_inode(dn), ceph_vinop(d_inode(dn))); + doutc(cl, "dn %p attached to %p ino %llx.%llx\n", dn, + d_inode(dn), ceph_vinop(d_inode(dn))); } return 0; } @@ -1492,14 +1518,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; struct ceph_vino tvino, dvino; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; int err = 0; - dout("fill_trace %p is_dentry %d is_target %d\n", req, - rinfo->head->is_dentry, rinfo->head->is_target); + doutc(cl, "%p is_dentry %d is_target %d\n", req, + rinfo->head->is_dentry, rinfo->head->is_target); if (!rinfo->head->is_target && !rinfo->head->is_dentry) { - dout("fill_trace reply is empty!\n"); + doutc(cl, "reply is empty!\n"); if (rinfo->head->result == 0 && req->r_parent) ceph_invalidate_dir_request(req); return 0; @@ -1556,13 +1583,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); retry_lookup: dn = d_lookup(parent, &dname); - dout("d_lookup on parent=%p name=%.*s got %p\n", - parent, dname.len, dname.name, dn); + doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n", + parent, dname.len, dname.name, dn); if (!dn) { dn = d_alloc(parent, &dname); - dout("d_alloc %p '%.*s' = %p\n", parent, - dname.len, dname.name, dn); + doutc(cl, "d_alloc %p '%.*s' = %p\n", parent, + dname.len, dname.name, dn); if (!dn) { dput(parent); ceph_fname_free_buffer(dir, &oname); @@ -1578,8 +1605,8 @@ retry_lookup: } else if (d_really_is_positive(dn) && (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { - dout(" dn %p points to wrong inode %p\n", - dn, d_inode(dn)); + doutc(cl, " dn %p points to wrong inode %p\n", + dn, d_inode(dn)); ceph_dir_clear_ordered(dir); d_delete(dn); dput(dn); @@ -1604,8 +1631,8 @@ retry_lookup: rinfo->head->result == 0) ? req->r_fmode : -1, &req->r_caps_reservation); if (err < 0) { - pr_err("ceph_fill_inode badness %p %llx.%llx\n", - in, ceph_vinop(in)); + pr_err_client(cl, "badness %p %llx.%llx\n", in, + ceph_vinop(in)); req->r_target_inode = NULL; if (in->i_state & I_NEW) discard_new_inode(in); @@ -1655,36 +1682,32 @@ retry_lookup: have_lease = have_dir_cap || le32_to_cpu(rinfo->dlease->duration_ms); if (!have_lease) - dout("fill_trace no dentry lease or dir cap\n"); + doutc(cl, "no dentry lease or dir cap\n"); /* rename? */ if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { struct inode *olddir = req->r_old_dentry_dir; BUG_ON(!olddir); - dout(" src %p '%pd' dst %p '%pd'\n", - req->r_old_dentry, - req->r_old_dentry, - dn, dn); - dout("fill_trace doing d_move %p -> %p\n", - req->r_old_dentry, dn); + doutc(cl, " src %p '%pd' dst %p '%pd'\n", + req->r_old_dentry, req->r_old_dentry, dn, dn); + doutc(cl, "doing d_move %p -> %p\n", req->r_old_dentry, dn); /* d_move screws up sibling dentries' offsets */ ceph_dir_clear_ordered(dir); ceph_dir_clear_ordered(olddir); d_move(req->r_old_dentry, dn); - dout(" src %p '%pd' dst %p '%pd'\n", - req->r_old_dentry, - req->r_old_dentry, - dn, dn); + doutc(cl, " src %p '%pd' dst %p '%pd'\n", + req->r_old_dentry, req->r_old_dentry, dn, dn); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - dout("dn %p gets new offset %lld\n", req->r_old_dentry, - ceph_dentry(req->r_old_dentry)->offset); + doutc(cl, "dn %p gets new offset %lld\n", + req->r_old_dentry, + ceph_dentry(req->r_old_dentry)->offset); /* swap r_dentry and r_old_dentry in case that * splice_dentry() gets called later. This is safe @@ -1696,9 +1719,9 @@ retry_lookup: /* null dentry? */ if (!rinfo->head->is_target) { - dout("fill_trace null dentry\n"); + doutc(cl, "null dentry\n"); if (d_really_is_positive(dn)) { - dout("d_delete %p\n", dn); + doutc(cl, "d_delete %p\n", dn); ceph_dir_clear_ordered(dir); d_delete(dn); } else if (have_lease) { @@ -1722,9 +1745,9 @@ retry_lookup: goto done; dn = req->r_dentry; /* may have spliced */ } else if (d_really_is_positive(dn) && d_inode(dn) != in) { - dout(" %p links to %p %llx.%llx, not %llx.%llx\n", - dn, d_inode(dn), ceph_vinop(d_inode(dn)), - ceph_vinop(in)); + doutc(cl, " %p links to %p %llx.%llx, not %llx.%llx\n", + dn, d_inode(dn), ceph_vinop(d_inode(dn)), + ceph_vinop(in)); d_invalidate(dn); have_lease = false; } @@ -1734,7 +1757,7 @@ retry_lookup: rinfo->dlease, session, req->r_request_started); } - dout(" final dn %p\n", dn); + doutc(cl, " final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && @@ -1745,7 +1768,8 @@ retry_lookup: BUG_ON(!dir); BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); BUG_ON(!req->r_dentry); - dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry); + doutc(cl, " linking snapped dir %p to dn %p\n", in, + req->r_dentry); ceph_dir_clear_ordered(dir); ihold(in); err = splice_dentry(&req->r_dentry, in); @@ -1767,7 +1791,7 @@ retry_lookup: &dvino, ptvino); } done: - dout("fill_trace done err=%d\n", err); + doutc(cl, "done err=%d\n", err); return err; } @@ -1778,6 +1802,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, struct ceph_mds_session *session) { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + struct ceph_client *cl = session->s_mdsc->fsc->client; int i, err = 0; for (i = 0; i < rinfo->dir_nr; i++) { @@ -1792,14 +1817,14 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL); if (IS_ERR(in)) { err = PTR_ERR(in); - dout("new_inode badness got %d\n", err); + doutc(cl, "badness got %d\n", err); continue; } rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, -1, &req->r_caps_reservation); if (rc < 0) { - pr_err("ceph_fill_inode badness on %p got %d\n", - in, rc); + pr_err_client(cl, "inode badness on %p got %d\n", in, + rc); err = rc; if (in->i_state & I_NEW) { ihold(in); @@ -1828,6 +1853,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, struct ceph_readdir_cache_control *ctl, struct ceph_mds_request *req) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_inode_info *ci = ceph_inode(dir); unsigned nsize = PAGE_SIZE / sizeof(struct dentry*); unsigned idx = ctl->index % nsize; @@ -1853,11 +1879,11 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { - dout("readdir cache dn %p idx %d\n", dn, ctl->index); + doutc(cl, "dn %p idx %d\n", dn, ctl->index); ctl->dentries[idx] = dn; ctl->index++; } else { - dout("disable readdir cache\n"); + doutc(cl, "disable readdir cache\n"); ctl->index = -1; } return 0; @@ -1870,6 +1896,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct inode *inode = d_inode(parent); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + struct ceph_client *cl = session->s_mdsc->fsc->client; struct qstr dname; struct dentry *dn; struct inode *in; @@ -1897,19 +1924,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, if (rinfo->dir_dir && le32_to_cpu(rinfo->dir_dir->frag) != frag) { - dout("readdir_prepopulate got new frag %x -> %x\n", - frag, le32_to_cpu(rinfo->dir_dir->frag)); + doutc(cl, "got new frag %x -> %x\n", frag, + le32_to_cpu(rinfo->dir_dir->frag)); frag = le32_to_cpu(rinfo->dir_dir->frag); if (!rinfo->hash_order) req->r_readdir_offset = 2; } if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { - dout("readdir_prepopulate %d items under SNAPDIR dn %p\n", - rinfo->dir_nr, parent); + doutc(cl, "%d items under SNAPDIR dn %p\n", + rinfo->dir_nr, parent); } else { - dout("readdir_prepopulate %d items under dn %p\n", - rinfo->dir_nr, parent); + doutc(cl, "%d items under dn %p\n", rinfo->dir_nr, parent); if (rinfo->dir_dir) ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); @@ -1953,15 +1979,15 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, retry_lookup: dn = d_lookup(parent, &dname); - dout("d_lookup on parent=%p name=%.*s got %p\n", - parent, dname.len, dname.name, dn); + doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n", + parent, dname.len, dname.name, dn); if (!dn) { dn = d_alloc(parent, &dname); - dout("d_alloc %p '%.*s' = %p\n", parent, - dname.len, dname.name, dn); + doutc(cl, "d_alloc %p '%.*s' = %p\n", parent, + dname.len, dname.name, dn); if (!dn) { - dout("d_alloc badness\n"); + doutc(cl, "d_alloc badness\n"); err = -ENOMEM; goto out; } @@ -1974,8 +2000,8 @@ retry_lookup: (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { struct ceph_dentry_info *di = ceph_dentry(dn); - dout(" dn %p points to wrong inode %p\n", - dn, d_inode(dn)); + doutc(cl, " dn %p points to wrong inode %p\n", + dn, d_inode(dn)); spin_lock(&dn->d_lock); if (di->offset > 0 && @@ -1997,7 +2023,7 @@ retry_lookup: } else { in = ceph_get_inode(parent->d_sb, tvino, NULL); if (IS_ERR(in)) { - dout("new_inode badness\n"); + doutc(cl, "new_inode badness\n"); d_drop(dn); dput(dn); err = PTR_ERR(in); @@ -2008,7 +2034,8 @@ retry_lookup: ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, -1, &req->r_caps_reservation); if (ret < 0) { - pr_err("ceph_fill_inode badness on %p\n", in); + pr_err_client(cl, "badness on %p %llx.%llx\n", in, + ceph_vinop(in)); if (d_really_is_negative(dn)) { if (in->i_state & I_NEW) { ihold(in); @@ -2025,8 +2052,8 @@ retry_lookup: if (d_really_is_negative(dn)) { if (ceph_security_xattr_deadlock(in)) { - dout(" skip splicing dn %p to inode %p" - " (security xattr deadlock)\n", dn, in); + doutc(cl, " skip splicing dn %p to inode %p" + " (security xattr deadlock)\n", dn, in); iput(in); skipped++; goto next_item; @@ -2058,17 +2085,18 @@ out: req->r_readdir_cache_idx = cache_ctl.index; } ceph_readdir_cache_release(&cache_ctl); - dout("readdir_prepopulate done\n"); + doutc(cl, "done\n"); return err; } bool ceph_inode_set_size(struct inode *inode, loff_t size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); bool ret; spin_lock(&ci->i_ceph_lock); - dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size); + doutc(cl, "set_size %p %llu -> %llu\n", inode, i_size_read(inode), size); i_size_write(inode, size); ceph_fscache_update(inode); inode->i_blocks = calc_inode_blocks(size); @@ -2082,22 +2110,25 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) void ceph_queue_inode_work(struct inode *inode, int work_bit) { - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); set_bit(work_bit, &ci->i_work_mask); ihold(inode); if (queue_work(fsc->inode_wq, &ci->i_work)) { - dout("queue_inode_work %p, mask=%lx\n", inode, ci->i_work_mask); + doutc(cl, "%p %llx.%llx mask=%lx\n", inode, + ceph_vinop(inode), ci->i_work_mask); } else { - dout("queue_inode_work %p already queued, mask=%lx\n", - inode, ci->i_work_mask); + doutc(cl, "%p %llx.%llx already queued, mask=%lx\n", + inode, ceph_vinop(inode), ci->i_work_mask); iput(inode); } } static void ceph_do_invalidate_pages(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u32 orig_gen; int check = 0; @@ -2107,8 +2138,9 @@ static void ceph_do_invalidate_pages(struct inode *inode) mutex_lock(&ci->i_truncate_mutex); if (ceph_inode_is_shutdown(inode)) { - pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n", - __func__, ceph_vinop(inode)); + pr_warn_ratelimited_client(cl, + "%p %llx.%llx is shut down\n", inode, + ceph_vinop(inode)); mapping_set_error(inode->i_mapping, -EIO); truncate_pagecache(inode, 0); mutex_unlock(&ci->i_truncate_mutex); @@ -2116,8 +2148,8 @@ static void ceph_do_invalidate_pages(struct inode *inode) } spin_lock(&ci->i_ceph_lock); - dout("invalidate_pages %p gen %d revoking %d\n", inode, - ci->i_rdcache_gen, ci->i_rdcache_revoking); + doutc(cl, "%p %llx.%llx gen %d revoking %d\n", inode, + ceph_vinop(inode), ci->i_rdcache_gen, ci->i_rdcache_revoking); if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) check = 1; @@ -2129,21 +2161,21 @@ static void ceph_do_invalidate_pages(struct inode *inode) spin_unlock(&ci->i_ceph_lock); if (invalidate_inode_pages2(inode->i_mapping) < 0) { - pr_err("invalidate_inode_pages2 %llx.%llx failed\n", - ceph_vinop(inode)); + pr_err_client(cl, "invalidate_inode_pages2 %llx.%llx failed\n", + ceph_vinop(inode)); } spin_lock(&ci->i_ceph_lock); if (orig_gen == ci->i_rdcache_gen && orig_gen == ci->i_rdcache_revoking) { - dout("invalidate_pages %p gen %d successful\n", inode, - ci->i_rdcache_gen); + doutc(cl, "%p %llx.%llx gen %d successful\n", inode, + ceph_vinop(inode), ci->i_rdcache_gen); ci->i_rdcache_revoking--; check = 1; } else { - dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", - inode, orig_gen, ci->i_rdcache_gen, - ci->i_rdcache_revoking); + doutc(cl, "%p %llx.%llx gen %d raced, now %d revoking %d\n", + inode, ceph_vinop(inode), orig_gen, ci->i_rdcache_gen, + ci->i_rdcache_revoking); if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) check = 1; } @@ -2160,6 +2192,7 @@ out: */ void __ceph_do_pending_vmtruncate(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 to; int wrbuffer_refs, finish = 0; @@ -2168,7 +2201,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) retry: spin_lock(&ci->i_ceph_lock); if (ci->i_truncate_pending == 0) { - dout("%s %p none pending\n", __func__, inode); + doutc(cl, "%p %llx.%llx none pending\n", inode, + ceph_vinop(inode)); spin_unlock(&ci->i_ceph_lock); mutex_unlock(&ci->i_truncate_mutex); return; @@ -2180,7 +2214,8 @@ retry: */ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { spin_unlock(&ci->i_ceph_lock); - dout("%s %p flushing snaps first\n", __func__, inode); + doutc(cl, "%p %llx.%llx flushing snaps first\n", inode, + ceph_vinop(inode)); filemap_write_and_wait_range(&inode->i_data, 0, inode->i_sb->s_maxbytes); goto retry; @@ -2191,8 +2226,8 @@ retry: to = ci->i_truncate_pagecache_size; wrbuffer_refs = ci->i_wrbuffer_ref; - dout("%s %p (%d) to %lld\n", __func__, inode, - ci->i_truncate_pending, to); + doutc(cl, "%p %llx.%llx (%d) to %lld\n", inode, ceph_vinop(inode), + ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); ceph_fscache_resize(inode, to); @@ -2220,9 +2255,10 @@ static void ceph_inode_work(struct work_struct *work) struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, i_work); struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { - dout("writeback %p\n", inode); + doutc(cl, "writeback %p %llx.%llx\n", inode, ceph_vinop(inode)); filemap_fdatawrite(&inode->i_data); } if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) @@ -2294,6 +2330,7 @@ static int fill_fscrypt_truncate(struct inode *inode, struct ceph_mds_request *req, struct iattr *attr) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE; loff_t pos, orig_pos = round_down(attr->ia_size, @@ -2316,9 +2353,9 @@ static int fill_fscrypt_truncate(struct inode *inode, issued = __ceph_caps_issued(ci, NULL); - dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__, - i_size, attr->ia_size, ceph_cap_string(got), - ceph_cap_string(issued)); + doutc(cl, "size %lld -> %lld got cap refs on %s, issued %s\n", + i_size, attr->ia_size, ceph_cap_string(got), + ceph_cap_string(issued)); /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { @@ -2373,8 +2410,7 @@ static int fill_fscrypt_truncate(struct inode *inode, * If the Rados object doesn't exist, it will be set to 0. */ if (!objver) { - dout("%s hit hole, ppos %lld < size %lld\n", __func__, - pos, i_size); + doutc(cl, "hit hole, ppos %lld < size %lld\n", pos, i_size); header.data_len = cpu_to_le32(8 + 8 + 4); header.file_offset = 0; @@ -2383,8 +2419,8 @@ static int fill_fscrypt_truncate(struct inode *inode, header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE); header.file_offset = cpu_to_le64(orig_pos); - dout("%s encrypt block boff/bsize %d/%lu\n", __func__, - boff, CEPH_FSCRYPT_BLOCK_SIZE); + doutc(cl, "encrypt block boff/bsize %d/%lu\n", boff, + CEPH_FSCRYPT_BLOCK_SIZE); /* truncate and zero out the extra contents for the last block */ memset(iov.iov_base + boff, 0, PAGE_SIZE - boff); @@ -2412,8 +2448,8 @@ static int fill_fscrypt_truncate(struct inode *inode, } req->r_pagelist = pagelist; out: - dout("%s %p size dropping cap refs on %s\n", __func__, - inode, ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx size dropping cap refs on %s\n", inode, + ceph_vinop(inode), ceph_cap_string(got)); ceph_put_cap_refs(ci, got); if (iov.iov_base) kunmap_local(iov.iov_base); @@ -2424,13 +2460,14 @@ out: return ret; } -int __ceph_setattr(struct inode *inode, struct iattr *attr, - struct ceph_iattr *cia) +int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, + struct iattr *attr, struct ceph_iattr *cia) { struct ceph_inode_info *ci = ceph_inode(inode); unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; loff_t isize = i_size_read(inode); int issued; @@ -2469,7 +2506,8 @@ retry: } } - dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx issued %s\n", inode, ceph_vinop(inode), + ceph_cap_string(issued)); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (cia && cia->fscrypt_auth) { u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth); @@ -2480,8 +2518,8 @@ retry: goto out; } - dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n", - ceph_vinop(inode), ci->fscrypt_auth_len, len); + doutc(cl, "%p %llx.%llx fscrypt_auth len %u to %u)\n", inode, + ceph_vinop(inode), ci->fscrypt_auth_len, len); /* It should never be re-set once set */ WARN_ON_ONCE(ci->fscrypt_auth); @@ -2509,38 +2547,44 @@ retry: #endif /* CONFIG_FS_ENCRYPTION */ if (ia_valid & ATTR_UID) { - dout("setattr %p uid %d -> %d\n", inode, - from_kuid(&init_user_ns, inode->i_uid), - from_kuid(&init_user_ns, attr->ia_uid)); + kuid_t fsuid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); + + doutc(cl, "%p %llx.%llx uid %d -> %d\n", inode, + ceph_vinop(inode), + from_kuid(&init_user_ns, inode->i_uid), + from_kuid(&init_user_ns, attr->ia_uid)); if (issued & CEPH_CAP_AUTH_EXCL) { - inode->i_uid = attr->ia_uid; + inode->i_uid = fsuid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || - !uid_eq(attr->ia_uid, inode->i_uid)) { + !uid_eq(fsuid, inode->i_uid)) { req->r_args.setattr.uid = cpu_to_le32( - from_kuid(&init_user_ns, attr->ia_uid)); + from_kuid(&init_user_ns, fsuid)); mask |= CEPH_SETATTR_UID; release |= CEPH_CAP_AUTH_SHARED; } } if (ia_valid & ATTR_GID) { - dout("setattr %p gid %d -> %d\n", inode, - from_kgid(&init_user_ns, inode->i_gid), - from_kgid(&init_user_ns, attr->ia_gid)); + kgid_t fsgid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); + + doutc(cl, "%p %llx.%llx gid %d -> %d\n", inode, + ceph_vinop(inode), + from_kgid(&init_user_ns, inode->i_gid), + from_kgid(&init_user_ns, attr->ia_gid)); if (issued & CEPH_CAP_AUTH_EXCL) { - inode->i_gid = attr->ia_gid; + inode->i_gid = fsgid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || - !gid_eq(attr->ia_gid, inode->i_gid)) { + !gid_eq(fsgid, inode->i_gid)) { req->r_args.setattr.gid = cpu_to_le32( - from_kgid(&init_user_ns, attr->ia_gid)); + from_kgid(&init_user_ns, fsgid)); mask |= CEPH_SETATTR_GID; release |= CEPH_CAP_AUTH_SHARED; } } if (ia_valid & ATTR_MODE) { - dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode, - attr->ia_mode); + doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, + ceph_vinop(inode), inode->i_mode, attr->ia_mode); if (issued & CEPH_CAP_AUTH_EXCL) { inode->i_mode = attr->ia_mode; dirtied |= CEPH_CAP_AUTH_EXCL; @@ -2556,9 +2600,10 @@ retry: if (ia_valid & ATTR_ATIME) { struct timespec64 atime = inode_get_atime(inode); - dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, - atime.tv_sec, atime.tv_nsec, - attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + doutc(cl, "%p %llx.%llx atime %lld.%09ld -> %lld.%09ld\n", + inode, ceph_vinop(inode), + atime.tv_sec, atime.tv_nsec, + attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode_set_atime_to_ts(inode, attr->ia_atime); @@ -2578,7 +2623,8 @@ retry: } } if (ia_valid & ATTR_SIZE) { - dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size); + doutc(cl, "%p %llx.%llx size %lld -> %lld\n", inode, + ceph_vinop(inode), isize, attr->ia_size); /* * Only when the new size is smaller and not aligned to * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed. @@ -2631,9 +2677,10 @@ retry: if (ia_valid & ATTR_MTIME) { struct timespec64 mtime = inode_get_mtime(inode); - dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, - mtime.tv_sec, mtime.tv_nsec, - attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + doutc(cl, "%p %llx.%llx mtime %lld.%09ld -> %lld.%09ld\n", + inode, ceph_vinop(inode), + mtime.tv_sec, mtime.tv_nsec, + attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode_set_mtime_to_ts(inode, attr->ia_mtime); @@ -2656,11 +2703,12 @@ retry: if (ia_valid & ATTR_CTIME) { bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; - dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - inode_get_ctime_sec(inode), - inode_get_ctime_nsec(inode), - attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, - only ? "ctime only" : "ignored"); + doutc(cl, "%p %llx.%llx ctime %lld.%09ld -> %lld.%09ld (%s)\n", + inode, ceph_vinop(inode), + inode_get_ctime_sec(inode), + inode_get_ctime_nsec(inode), + attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + only ? "ctime only" : "ignored"); if (only) { /* * if kernel wants to dirty ctime but nothing else, @@ -2678,7 +2726,8 @@ retry: } } if (ia_valid & ATTR_FILE) - dout("setattr %p ATTR_FILE ... hrm!\n", inode); + doutc(cl, "%p %llx.%llx ATTR_FILE ... hrm!\n", inode, + ceph_vinop(inode)); if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, @@ -2719,16 +2768,17 @@ retry: */ err = ceph_mdsc_do_request(mdsc, NULL, req); if (err == -EAGAIN && truncate_retry--) { - dout("setattr %p result=%d (%s locally, %d remote), retry it!\n", - inode, err, ceph_cap_string(dirtied), mask); + doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote), retry it!\n", + inode, ceph_vinop(inode), err, + ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); goto retry; } } out: - dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, - ceph_cap_string(dirtied), mask); + doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote)\n", inode, + ceph_vinop(inode), err, ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); @@ -2746,7 +2796,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int err; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -2759,7 +2809,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; - err = setattr_prepare(&nop_mnt_idmap, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err != 0) return err; @@ -2771,10 +2821,10 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; - err = __ceph_setattr(inode, attr, NULL); + err = __ceph_setattr(idmap, inode, attr, NULL); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); + err = posix_acl_chmod(idmap, dentry, attr->ia_mode); return err; } @@ -2816,19 +2866,21 @@ int ceph_try_to_choose_auth_mds(struct inode *inode, int mask) int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode; int err; if (ceph_snap(inode) == CEPH_SNAPDIR) { - dout("do_getattr inode %p SNAPDIR\n", inode); + doutc(cl, "inode %p %llx.%llx SNAPDIR\n", inode, + ceph_vinop(inode)); return 0; } - dout("do_getattr inode %p mask %s mode 0%o\n", - inode, ceph_cap_string(mask), inode->i_mode); + doutc(cl, "inode %p %llx.%llx mask %s mode 0%o\n", inode, + ceph_vinop(inode), ceph_cap_string(mask), inode->i_mode); if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) return 0; @@ -2855,14 +2907,15 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, } } ceph_mdsc_put_request(req); - dout("do_getattr result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode = USE_AUTH_MDS; @@ -2892,7 +2945,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, xattr_value = req->r_reply_info.xattr_info.xattr_value; xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len; - dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); + doutc(cl, "xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); err = (int)xattr_value_len; if (size == 0) @@ -2907,7 +2960,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, put: ceph_mdsc_put_request(req); out: - dout("do_getvxattr result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } @@ -2927,7 +2980,7 @@ int ceph_permission(struct mnt_idmap *idmap, struct inode *inode, err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false); if (!err) - err = generic_permission(&nop_mnt_idmap, inode, mask); + err = generic_permission(idmap, inode, mask); return err; } @@ -2984,7 +3037,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, return err; } - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->ino = ceph_present_inode(inode); /* @@ -3007,7 +3060,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; if (S_ISDIR(inode->i_mode)) { - if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) { + if (ceph_test_mount_opt(ceph_sb_to_fs_client(sb), RBYTES)) { stat->size = ci->i_rbytes; } else if (ceph_snap(inode) == CEPH_SNAPDIR) { struct ceph_inode_info *pci; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 91a84917d203..e861de3c79b9 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -65,7 +65,7 @@ static long __validate_layout(struct ceph_mds_client *mdsc, static long ceph_ioctl_set_layout(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; struct ceph_ioctl_layout l; struct ceph_inode_info *ci = ceph_inode(file_inode(file)); @@ -140,7 +140,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) struct ceph_mds_request *req; struct ceph_ioctl_layout l; int err; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; /* copy and validate */ if (copy_from_user(&l, arg, sizeof(l))) @@ -183,7 +183,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + &ceph_sb_to_fs_client(inode->i_sb)->client->osdc; struct ceph_object_locator oloc; CEPH_DEFINE_OID_ONSTACK(oid); u32 xlen; @@ -244,7 +244,8 @@ static long ceph_ioctl_lazyio(struct file *file) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { spin_lock(&ci->i_ceph_lock); @@ -252,11 +253,13 @@ static long ceph_ioctl_lazyio(struct file *file) ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++; __ceph_touch_fmode(ci, mdsc, fi->fmode); spin_unlock(&ci->i_ceph_lock); - dout("ioctl_layzio: file %p marked lazy\n", file); + doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode, + ceph_vinop(inode)); ceph_check_caps(ci, 0); } else { - dout("ioctl_layzio: file %p already lazy\n", file); + doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode, + ceph_vinop(inode)); } return 0; } @@ -355,10 +358,12 @@ static const char *ceph_ioctl_cmd_name(const unsigned int cmd) long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file_inode(file); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int ret; - dout("ioctl file %p cmd %s arg %lu\n", file, - ceph_ioctl_cmd_name(cmd), arg); + doutc(fsc->client, "file %p %p %llx.%llx cmd %s arg %lu\n", file, + inode, ceph_vinop(inode), ceph_ioctl_cmd_name(cmd), arg); switch (cmd) { case CEPH_IOC_GET_LAYOUT: return ceph_ioctl_get_layout(file, (void __user *)arg); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index cb51c7e9c8e2..e07ad29ff8b9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -77,6 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int err; u64 length = 0; @@ -111,10 +112,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, owner = secure_addr(fl->fl_owner); - dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " - "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type, - (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, - wait, fl->fl_type); + doutc(cl, "rule: %d, op: %d, owner: %llx, pid: %llu, " + "start: %llu, length: %llu, wait: %d, type: %d\n", + (int)lock_type, (int)operation, owner, (u64)fl->fl_pid, + fl->fl_start, length, wait, fl->fl_type); req->r_args.filelock_change.rule = lock_type; req->r_args.filelock_change.type = cmd; @@ -147,16 +148,17 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, } ceph_mdsc_put_request(req); - dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type, - (int)operation, (u64)fl->fl_pid, fl->fl_start, - length, wait, fl->fl_type, err); + doutc(cl, "rule: %d, op: %d, pid: %llu, start: %llu, " + "length: %llu, wait: %d, type: %d, err code %d\n", + (int)lock_type, (int)operation, (u64)fl->fl_pid, + fl->fl_start, length, wait, fl->fl_type, err); return err; } static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *intr_req; struct inode *inode = req->r_inode; int err, lock_type; @@ -174,8 +176,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, if (!err) return 0; - dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", - req->r_tid); + doutc(cl, "request %llu was interrupted\n", req->r_tid); mutex_lock(&mdsc->mutex); if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { @@ -246,6 +247,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; u8 wait = 0; @@ -257,7 +259,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - dout("ceph_lock, fl_owner: %p\n", fl->fl_owner); + doutc(cl, "fl_owner: %p\n", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -292,7 +294,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) { - dout("mds locked, locking locally\n"); + doutc(cl, "locking locally\n"); err = posix_lock_file(file, fl, NULL); if (err) { /* undo! This should only happen if @@ -300,8 +302,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on posix_lock_file, undid lock\n", - err); + doutc(cl, "got %d on posix_lock_file, undid lock\n", + err); } } } @@ -312,6 +314,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; u8 wait = 0; u8 lock_cmd; @@ -322,7 +325,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - dout("ceph_flock, fl_file: %p\n", fl->fl_file); + doutc(cl, "fl_file: %p\n", fl->fl_file); spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { @@ -359,7 +362,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on locks_lock_file_wait, undid lock\n", err); + doutc(cl, "got %d on locks_lock_file_wait, undid lock\n", + err); } } return err; @@ -371,6 +375,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) */ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct file_lock *lock; struct file_lock_context *ctx; @@ -386,17 +391,20 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ++(*flock_count); spin_unlock(&ctx->flc_lock); } - dout("counted %d flock locks and %d fcntl locks\n", - *flock_count, *fcntl_count); + doutc(cl, "counted %d flock locks and %d fcntl locks\n", + *flock_count, *fcntl_count); } /* * Given a pointer to a lock, convert it to a ceph filelock */ -static int lock_to_ceph_filelock(struct file_lock *lock, +static int lock_to_ceph_filelock(struct inode *inode, + struct file_lock *lock, struct ceph_filelock *cephlock) { + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; + cephlock->start = cpu_to_le64(lock->fl_start); cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); cephlock->client = cpu_to_le64(0); @@ -414,7 +422,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, cephlock->type = CEPH_LOCK_UNLOCK; break; default: - dout("Have unknown lock type %d\n", lock->fl_type); + doutc(cl, "Have unknown lock type %d\n", lock->fl_type); err = -EINVAL; } @@ -432,13 +440,14 @@ int ceph_encode_locks_to_buffer(struct inode *inode, { struct file_lock *lock; struct file_lock_context *ctx = locks_inode_context(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; int seen_fcntl = 0; int seen_flock = 0; int l = 0; - dout("encoding %d flock and %d fcntl locks\n", num_flock_locks, - num_fcntl_locks); + doutc(cl, "encoding %d flock and %d fcntl locks\n", num_flock_locks, + num_fcntl_locks); if (!ctx) return 0; @@ -450,7 +459,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &flocks[l]); + err = lock_to_ceph_filelock(inode, lock, &flocks[l]); if (err) goto fail; ++l; @@ -461,7 +470,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &flocks[l]); + err = lock_to_ceph_filelock(inode, lock, &flocks[l]); if (err) goto fail; ++l; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index de798444bb97..d95eb525519a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -12,6 +12,7 @@ #include <linux/bits.h> #include <linux/ktime.h> #include <linux/bitmap.h> +#include <linux/mnt_idmapping.h> #include "super.h" #include "mds_client.h" @@ -411,6 +412,7 @@ static int parse_reply_info_readdir(void **p, void *end, u64 features) { struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; + struct ceph_client *cl = req->r_mdsc->fsc->client; u32 num, i = 0; int err; @@ -433,7 +435,7 @@ static int parse_reply_info_readdir(void **p, void *end, BUG_ON(!info->dir_entries); if ((unsigned long)(info->dir_entries + num) > (unsigned long)info->dir_entries + info->dir_buf_size) { - pr_err("dir contents are larger than expected\n"); + pr_err_client(cl, "dir contents are larger than expected\n"); WARN_ON(1); goto bad; } @@ -454,7 +456,7 @@ static int parse_reply_info_readdir(void **p, void *end, ceph_decode_need(p, end, _name_len, bad); _name = *p; *p += _name_len; - dout("parsed dir dname '%.*s'\n", _name_len, _name); + doutc(cl, "parsed dir dname '%.*s'\n", _name_len, _name); if (info->hash_order) rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, @@ -514,8 +516,8 @@ static int parse_reply_info_readdir(void **p, void *end, rde->is_nokey = false; err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey); if (err) { - pr_err("%s unable to decode %.*s, got %d\n", __func__, - _name_len, _name, err); + pr_err_client(cl, "unable to decode %.*s, got %d\n", + _name_len, _name, err); goto out_bad; } rde->name = oname.name; @@ -539,7 +541,7 @@ done: bad: err = -EIO; out_bad: - pr_err("problem parsing dir contents %d\n", err); + pr_err_client(cl, "problem parsing dir contents %d\n", err); return err; } @@ -570,10 +572,11 @@ bad: static int ceph_parse_deleg_inos(void **p, void *end, struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; u32 sets; ceph_decode_32_safe(p, end, sets, bad); - dout("got %u sets of delegated inodes\n", sets); + doutc(cl, "got %u sets of delegated inodes\n", sets); while (sets--) { u64 start, len; @@ -582,8 +585,9 @@ static int ceph_parse_deleg_inos(void **p, void *end, /* Don't accept a delegation of system inodes */ if (start < CEPH_INO_SYSTEM_BASE) { - pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", - start, len); + pr_warn_ratelimited_client(cl, + "ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", + start, len); continue; } while (len--) { @@ -591,10 +595,10 @@ static int ceph_parse_deleg_inos(void **p, void *end, DELEGATED_INO_AVAILABLE, GFP_KERNEL); if (!err) { - dout("added delegated inode 0x%llx\n", - start - 1); + doutc(cl, "added delegated inode 0x%llx\n", start - 1); } else if (err == -EBUSY) { - pr_warn("MDS delegated inode 0x%llx more than once.\n", + pr_warn_client(cl, + "MDS delegated inode 0x%llx more than once.\n", start - 1); } else { return err; @@ -744,6 +748,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, struct ceph_mds_request *req, u64 features) { struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; + struct ceph_client *cl = s->s_mdsc->fsc->client; void *p, *end; u32 len; int err; @@ -783,7 +788,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, bad: err = -EIO; out_bad: - pr_err("mds parse_reply err %d\n", err); + pr_err_client(cl, "mds parse_reply err %d\n", err); ceph_msg_dump(msg); return err; } @@ -830,7 +835,8 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) */ int ceph_wait_on_conflict_unlink(struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); + struct ceph_client *cl = fsc->client; struct dentry *pdentry = dentry->d_parent; struct dentry *udentry, *found = NULL; struct ceph_dentry_info *di; @@ -855,8 +861,8 @@ int ceph_wait_on_conflict_unlink(struct dentry *dentry) goto next; if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) - pr_warn("%s dentry %p:%pd async unlink bit is not set\n", - __func__, dentry, dentry); + pr_warn_client(cl, "dentry %p:%pd async unlink bit is not set\n", + dentry, dentry); if (!d_same_name(udentry, pdentry, &dname)) goto next; @@ -872,8 +878,8 @@ next: if (likely(!found)) return 0; - dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__, - dentry, dentry, found, found); + doutc(cl, "dentry %p:%pd conflict with old %p:%pd\n", dentry, dentry, + found, found); err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT, TASK_KILLABLE); @@ -957,6 +963,7 @@ static int __verify_registered_session(struct ceph_mds_client *mdsc, static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, int mds) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *s; if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) @@ -973,7 +980,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, int newmax = 1 << get_count_order(mds + 1); struct ceph_mds_session **sa; - dout("%s: realloc to %d\n", __func__, newmax); + doutc(cl, "realloc to %d\n", newmax); sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); if (!sa) goto fail_realloc; @@ -986,7 +993,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, mdsc->max_sessions = newmax; } - dout("%s: mds%d\n", __func__, mds); + doutc(cl, "mds%d\n", mds); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -1029,7 +1036,7 @@ fail_realloc: static void __unregister_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *s) { - dout("__unregister_session mds%d %p\n", s->s_mds, s); + doutc(mdsc->fsc->client, "mds%d %p\n", s->s_mds, s); BUG_ON(mdsc->sessions[s->s_mds] != s); mdsc->sessions[s->s_mds] = NULL; ceph_con_close(&s->s_con); @@ -1116,6 +1123,8 @@ void ceph_mdsc_release_request(struct kref *kref) kfree(req->r_path1); kfree(req->r_path2); put_cred(req->r_cred); + if (req->r_mnt_idmap) + mnt_idmap_put(req->r_mnt_idmap); if (req->r_pagelist) ceph_pagelist_release(req->r_pagelist); kfree(req->r_fscrypt_auth); @@ -1155,6 +1164,7 @@ static void __register_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, struct inode *dir) { + struct ceph_client *cl = mdsc->fsc->client; int ret = 0; req->r_tid = ++mdsc->last_tid; @@ -1162,18 +1172,20 @@ static void __register_request(struct ceph_mds_client *mdsc, ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, req->r_num_caps); if (ret < 0) { - pr_err("__register_request %p " - "failed to reserve caps: %d\n", req, ret); + pr_err_client(cl, "%p failed to reserve caps: %d\n", + req, ret); /* set req->r_err to fail early from __do_request */ req->r_err = ret; return; } } - dout("__register_request %p tid %lld\n", req, req->r_tid); + doutc(cl, "%p tid %lld\n", req, req->r_tid); ceph_mdsc_get_request(req); insert_request(&mdsc->request_tree, req); req->r_cred = get_current_cred(); + if (!req->r_mnt_idmap) + req->r_mnt_idmap = &nop_mnt_idmap; if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) mdsc->oldest_tid = req->r_tid; @@ -1192,7 +1204,7 @@ static void __register_request(struct ceph_mds_client *mdsc, static void __unregister_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { - dout("__unregister_request %p tid %lld\n", req, req->r_tid); + doutc(mdsc->fsc->client, "%p tid %lld\n", req, req->r_tid); /* Never leave an unregistered request on an unsafe list! */ list_del_init(&req->r_unsafe_item); @@ -1278,6 +1290,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, int mds = -1; u32 hash = req->r_direct_hash; bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); + struct ceph_client *cl = mdsc->fsc->client; if (random) *random = false; @@ -1289,8 +1302,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (req->r_resend_mds >= 0 && (__have_session(mdsc, req->r_resend_mds) || ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) { - dout("%s using resend_mds mds%d\n", __func__, - req->r_resend_mds); + doutc(cl, "using resend_mds mds%d\n", req->r_resend_mds); return req->r_resend_mds; } @@ -1307,7 +1319,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_lock(); inode = get_nonsnap_parent(req->r_dentry); rcu_read_unlock(); - dout("%s using snapdir's parent %p\n", __func__, inode); + doutc(cl, "using snapdir's parent %p %llx.%llx\n", + inode, ceph_vinop(inode)); } } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ @@ -1327,7 +1340,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, /* direct snapped/virtual snapdir requests * based on parent dir inode */ inode = get_nonsnap_parent(parent); - dout("%s using nonsnap parent %p\n", __func__, inode); + doutc(cl, "using nonsnap parent %p %llx.%llx\n", + inode, ceph_vinop(inode)); } else { /* dentry target */ inode = d_inode(req->r_dentry); @@ -1343,10 +1357,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_unlock(); } - dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash, - hash, mode); if (!inode) goto random; + + doutc(cl, "%p %llx.%llx is_hash=%d (0x%x) mode %d\n", inode, + ceph_vinop(inode), (int)is_hash, hash, mode); ci = ceph_inode(inode); if (is_hash && S_ISDIR(inode->i_mode)) { @@ -1362,9 +1377,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, get_random_bytes(&r, 1); r %= frag.ndist; mds = frag.dist[r]; - dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n", - __func__, inode, ceph_vinop(inode), - frag.frag, mds, (int)r, frag.ndist); + doutc(cl, "%p %llx.%llx frag %u mds%d (%d/%d)\n", + inode, ceph_vinop(inode), frag.frag, + mds, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE && !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) @@ -1377,9 +1392,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (frag.mds >= 0) { /* choose auth mds */ mds = frag.mds; - dout("%s %p %llx.%llx frag %u mds%d (auth)\n", - __func__, inode, ceph_vinop(inode), - frag.frag, mds); + doutc(cl, "%p %llx.%llx frag %u mds%d (auth)\n", + inode, ceph_vinop(inode), frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) { if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, @@ -1403,9 +1417,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, goto random; } mds = cap->session->s_mds; - dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__, - inode, ceph_vinop(inode), mds, - cap == ci->i_auth_cap ? "auth " : "", cap); + doutc(cl, "%p %llx.%llx mds%d (%scap %p)\n", inode, + ceph_vinop(inode), mds, + cap == ci->i_auth_cap ? "auth " : "", cap); spin_unlock(&ci->i_ceph_lock); out: iput(inode); @@ -1416,7 +1430,7 @@ random: *random = true; mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); - dout("%s chose random mds%d\n", __func__, mds); + doutc(cl, "chose random mds%d\n", mds); return mds; } @@ -1529,6 +1543,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; + struct ceph_client *cl = mdsc->fsc->client; size_t size, count; void *p, *end; int ret; @@ -1567,7 +1582,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { - pr_err("ENOMEM creating session open msg\n"); + pr_err_client(cl, "ENOMEM creating session open msg\n"); return ERR_PTR(-ENOMEM); } p = msg->front.iov_base; @@ -1607,14 +1622,14 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 ret = encode_supported_features(&p, end); if (ret) { - pr_err("encode_supported_features failed!\n"); + pr_err_client(cl, "encode_supported_features failed!\n"); ceph_msg_put(msg); return ERR_PTR(ret); } ret = encode_metric_spec(&p, end); if (ret) { - pr_err("encode_metric_spec failed!\n"); + pr_err_client(cl, "encode_metric_spec failed!\n"); ceph_msg_put(msg); return ERR_PTR(ret); } @@ -1642,8 +1657,8 @@ static int __open_session(struct ceph_mds_client *mdsc, /* wait for mds to go active? */ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); - dout("open_session to mds%d (%s)\n", mds, - ceph_mds_state_name(mstate)); + doutc(mdsc->fsc->client, "open_session to mds%d (%s)\n", mds, + ceph_mds_state_name(mstate)); session->s_state = CEPH_MDS_SESSION_OPENING; session->s_renew_requested = jiffies; @@ -1686,8 +1701,9 @@ struct ceph_mds_session * ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target) { struct ceph_mds_session *session; + struct ceph_client *cl = mdsc->fsc->client; - dout("open_export_target_session to mds%d\n", target); + doutc(cl, "to mds%d\n", target); mutex_lock(&mdsc->mutex); session = __open_export_target_session(mdsc, target); @@ -1702,13 +1718,14 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_info *mi; struct ceph_mds_session *ts; int i, mds = session->s_mds; + struct ceph_client *cl = mdsc->fsc->client; if (mds >= mdsc->mdsmap->possible_max_rank) return; mi = &mdsc->mdsmap->m_info[mds]; - dout("open_export_target_sessions for mds%d (%d targets)\n", - session->s_mds, mi->num_export_targets); + doutc(cl, "for mds%d (%d targets)\n", session->s_mds, + mi->num_export_targets); for (i = 0; i < mi->num_export_targets; i++) { ts = __open_export_target_session(mdsc, mi->export_targets[i]); @@ -1731,11 +1748,13 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, static void detach_cap_releases(struct ceph_mds_session *session, struct list_head *target) { + struct ceph_client *cl = session->s_mdsc->fsc->client; + lockdep_assert_held(&session->s_cap_lock); list_splice_init(&session->s_cap_releases, target); session->s_num_cap_releases = 0; - dout("dispose_cap_releases mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); } static void dispose_cap_releases(struct ceph_mds_client *mdsc, @@ -1753,16 +1772,17 @@ static void dispose_cap_releases(struct ceph_mds_client *mdsc, static void cleanup_session_requests(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct rb_node *p; - dout("cleanup_session_requests mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); while (!list_empty(&session->s_unsafe)) { req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); - pr_warn_ratelimited(" dropping unsafe request %llu\n", - req->r_tid); + pr_warn_ratelimited_client(cl, " dropping unsafe request %llu\n", + req->r_tid); if (req->r_target_inode) mapping_set_error(req->r_target_inode->i_mapping, -EIO); if (req->r_unsafe_dir) @@ -1791,13 +1811,14 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct list_head *p; struct ceph_cap *cap; struct inode *inode, *last_inode = NULL; struct ceph_cap *old_cap = NULL; int ret; - dout("iterate_session_caps %p mds%d\n", session, session->s_mds); + doutc(cl, "%p mds%d\n", session, session->s_mds); spin_lock(&session->s_cap_lock); p = session->s_caps.next; while (p != &session->s_caps) { @@ -1828,8 +1849,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, spin_lock(&session->s_cap_lock); p = p->next; if (!cap->ci) { - dout("iterate_session_caps finishing cap %p removal\n", - cap); + doutc(cl, "finishing cap %p removal\n", cap); BUG_ON(cap->session != session); cap->session = NULL; list_del_init(&cap->session_caps); @@ -1858,6 +1878,7 @@ out: static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); bool invalidate = false; struct ceph_cap *cap; int iputs = 0; @@ -1865,8 +1886,8 @@ static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (cap) { - dout(" removing cap %p, ci is %p, inode is %p\n", - cap, ci, &ci->netfs.inode); + doutc(cl, " removing cap %p, ci is %p, inode is %p\n", + cap, ci, &ci->netfs.inode); iputs = ceph_purge_inode_cap(inode, cap, &invalidate); } @@ -1890,7 +1911,7 @@ static void remove_session_caps(struct ceph_mds_session *session) struct super_block *sb = fsc->sb; LIST_HEAD(dispose); - dout("remove_session_caps on %p\n", session); + doutc(fsc->client, "on %p\n", session); ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); wake_up_all(&fsc->mdsc->cap_flushing_wq); @@ -1971,7 +1992,9 @@ static int wake_up_session_cb(struct inode *inode, int mds, void *arg) static void wake_up_session_caps(struct ceph_mds_session *session, int ev) { - dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); + struct ceph_client *cl = session->s_mdsc->fsc->client; + + doutc(cl, "session %p mds%d\n", session, session->s_mds); ceph_iterate_session_caps(session, wake_up_session_cb, (void *)(unsigned long)ev); } @@ -1985,25 +2008,26 @@ static void wake_up_session_caps(struct ceph_mds_session *session, int ev) static int send_renew_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; int state; if (time_after_eq(jiffies, session->s_cap_ttl) && time_after_eq(session->s_cap_ttl, session->s_renew_requested)) - pr_info("mds%d caps stale\n", session->s_mds); + pr_info_client(cl, "mds%d caps stale\n", session->s_mds); session->s_renew_requested = jiffies; /* do not try to renew caps until a recovering mds has reconnected * with its clients. */ state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds); if (state < CEPH_MDS_STATE_RECONNECT) { - dout("send_renew_caps ignoring mds%d (%s)\n", - session->s_mds, ceph_mds_state_name(state)); + doutc(cl, "ignoring mds%d (%s)\n", session->s_mds, + ceph_mds_state_name(state)); return 0; } - dout("send_renew_caps to mds%d (%s)\n", session->s_mds, - ceph_mds_state_name(state)); + doutc(cl, "to mds%d (%s)\n", session->s_mds, + ceph_mds_state_name(state)); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); if (!msg) @@ -2015,10 +2039,11 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, static int send_flushmsg_ack(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, u64 seq) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; - dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", - session->s_mds, ceph_session_state_name(session->s_state), seq); + doutc(cl, "to mds%d (%s)s seq %lld\n", session->s_mds, + ceph_session_state_name(session->s_state), seq); msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); if (!msg) return -ENOMEM; @@ -2035,6 +2060,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc, static void renewed_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int is_renew) { + struct ceph_client *cl = mdsc->fsc->client; int was_stale; int wake = 0; @@ -2046,15 +2072,17 @@ static void renewed_caps(struct ceph_mds_client *mdsc, if (was_stale) { if (time_before(jiffies, session->s_cap_ttl)) { - pr_info("mds%d caps renewed\n", session->s_mds); + pr_info_client(cl, "mds%d caps renewed\n", + session->s_mds); wake = 1; } else { - pr_info("mds%d caps still stale\n", session->s_mds); + pr_info_client(cl, "mds%d caps still stale\n", + session->s_mds); } } - dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n", - session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh", - time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); + doutc(cl, "mds%d ttl now %lu, was %s, now %s\n", session->s_mds, + session->s_cap_ttl, was_stale ? "stale" : "fresh", + time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); spin_unlock(&session->s_cap_lock); if (wake) @@ -2066,11 +2094,11 @@ static void renewed_caps(struct ceph_mds_client *mdsc, */ static int request_close_session(struct ceph_mds_session *session) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_msg *msg; - dout("request_close_session mds%d state %s seq %lld\n", - session->s_mds, ceph_session_state_name(session->s_state), - session->s_seq); + doutc(cl, "mds%d state %s seq %lld\n", session->s_mds, + ceph_session_state_name(session->s_state), session->s_seq); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); if (!msg) @@ -2126,6 +2154,8 @@ out: */ static int trim_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; int *remaining = arg; struct ceph_inode_info *ci = ceph_inode(inode); int used, wanted, oissued, mine; @@ -2145,9 +2175,10 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) wanted = __ceph_caps_file_wanted(ci); oissued = __ceph_caps_issued_other(ci, cap); - dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n", - inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), - ceph_cap_string(used), ceph_cap_string(wanted)); + doutc(cl, "%p %llx.%llx cap %p mine %s oissued %s used %s wanted %s\n", + inode, ceph_vinop(inode), cap, ceph_cap_string(mine), + ceph_cap_string(oissued), ceph_cap_string(used), + ceph_cap_string(wanted)); if (cap == ci->i_auth_cap) { if (ci->i_dirty_caps || ci->i_flushing_caps || !list_empty(&ci->i_cap_snaps)) @@ -2173,7 +2204,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) if (oissued) { /* we aren't the only cap.. just remove us */ - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); (*remaining)--; } else { struct dentry *dentry; @@ -2187,8 +2218,8 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) count = atomic_read(&inode->i_count); if (count == 1) (*remaining)--; - dout("trim_caps_cb %p cap %p pruned, count now %d\n", - inode, cap, count); + doutc(cl, "%p %llx.%llx cap %p pruned, count now %d\n", + inode, ceph_vinop(inode), cap, count); } else { dput(dentry); } @@ -2207,17 +2238,18 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int max_caps) { + struct ceph_client *cl = mdsc->fsc->client; int trim_caps = session->s_nr_caps - max_caps; - dout("trim_caps mds%d start: %d / %d, trim %d\n", - session->s_mds, session->s_nr_caps, max_caps, trim_caps); + doutc(cl, "mds%d start: %d / %d, trim %d\n", session->s_mds, + session->s_nr_caps, max_caps, trim_caps); if (trim_caps > 0) { int remaining = trim_caps; ceph_iterate_session_caps(session, trim_caps_cb, &remaining); - dout("trim_caps mds%d done: %d / %d, trimmed %d\n", - session->s_mds, session->s_nr_caps, max_caps, - trim_caps - remaining); + doutc(cl, "mds%d done: %d / %d, trimmed %d\n", + session->s_mds, session->s_nr_caps, max_caps, + trim_caps - remaining); } ceph_flush_cap_releases(mdsc, session); @@ -2227,6 +2259,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, static int check_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { + struct ceph_client *cl = mdsc->fsc->client; int ret = 1; spin_lock(&mdsc->cap_dirty_lock); @@ -2235,8 +2268,8 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, list_first_entry(&mdsc->cap_flush_list, struct ceph_cap_flush, g_list); if (cf->tid <= want_flush_tid) { - dout("check_caps_flush still flushing tid " - "%llu <= %llu\n", cf->tid, want_flush_tid); + doutc(cl, "still flushing tid %llu <= %llu\n", + cf->tid, want_flush_tid); ret = 0; } } @@ -2252,12 +2285,14 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { - dout("check_caps_flush want %llu\n", want_flush_tid); + struct ceph_client *cl = mdsc->fsc->client; + + doutc(cl, "want %llu\n", want_flush_tid); wait_event(mdsc->cap_flushing_wq, check_caps_flush(mdsc, want_flush_tid)); - dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); + doutc(cl, "ok, flushed thru %llu\n", want_flush_tid); } /* @@ -2266,6 +2301,7 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, static void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg = NULL; struct ceph_mds_cap_release *head; struct ceph_mds_cap_item *item; @@ -2324,7 +2360,7 @@ again: msg->front.iov_len += sizeof(*cap_barrier); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + doutc(cl, "mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); msg = NULL; } @@ -2344,13 +2380,13 @@ again: msg->front.iov_len += sizeof(*cap_barrier); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + doutc(cl, "mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); } return; out_err: - pr_err("send_cap_releases mds%d, failed to allocate message\n", - session->s_mds); + pr_err_client(cl, "mds%d, failed to allocate message\n", + session->s_mds); spin_lock(&session->s_cap_lock); list_splice(&tmp_list, &session->s_cap_releases); session->s_num_cap_releases += num_cap_releases; @@ -2373,16 +2409,17 @@ static void ceph_cap_release_work(struct work_struct *work) void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; if (mdsc->stopping) return; ceph_get_mds_session(session); if (queue_work(mdsc->fsc->cap_wq, &session->s_cap_release_work)) { - dout("cap release work queued\n"); + doutc(cl, "cap release work queued\n"); } else { ceph_put_mds_session(session); - dout("failed to queue cap release work\n"); + doutc(cl, "failed to queue cap release work\n"); } } @@ -2410,13 +2447,14 @@ static void ceph_cap_reclaim_work(struct work_struct *work) void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; if (mdsc->stopping) return; if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) { - dout("caps reclaim work queued\n"); + doutc(cl, "caps reclaim work queued\n"); } else { - dout("failed to queue caps release work\n"); + doutc(cl, "failed to queue caps release work\n"); } } @@ -2588,6 +2626,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) /** * ceph_mdsc_build_path - build a path string to a given dentry + * @mdsc: mds client * @dentry: dentry to which path should be built * @plen: returned length of string * @pbase: returned base inode number @@ -2607,9 +2646,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) * Encode hidden .snap dirs as a double /, i.e. * foo/.snap/bar -> foo//bar */ -char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, - int for_wire) +char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + int *plen, u64 *pbase, int for_wire) { + struct ceph_client *cl = mdsc->fsc->client; struct dentry *cur; struct inode *inode; char *path; @@ -2635,8 +2675,7 @@ retry: spin_lock(&cur->d_lock); inode = d_inode(cur); if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { - dout("build_path path+%d: %p SNAPDIR\n", - pos, cur); + doutc(cl, "path+%d: %p SNAPDIR\n", pos, cur); spin_unlock(&cur->d_lock); parent = dget_parent(cur); } else if (for_wire && inode && dentry != cur && @@ -2714,21 +2753,21 @@ retry: * A rename didn't occur, but somehow we didn't end up where * we thought we would. Throw a warning and try again. */ - pr_warn("build_path did not end path lookup where expected (pos = %d)\n", - pos); + pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n", + pos); goto retry; } *pbase = base; *plen = PATH_MAX - 1 - pos; - dout("build_path on %p %d built %llx '%.*s'\n", - dentry, d_count(dentry), base, *plen, path + pos); + doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry), + base, *plen, path + pos); return path + pos; } -static int build_dentry_path(struct dentry *dentry, struct inode *dir, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath, bool parent_locked) +static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + struct inode *dir, const char **ppath, int *ppathlen, + u64 *pino, bool *pfreepath, bool parent_locked) { char *path; @@ -2744,7 +2783,7 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, return 0; } rcu_read_unlock(); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; @@ -2756,6 +2795,7 @@ static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, bool *pfreepath) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct dentry *dentry; char *path; @@ -2765,7 +2805,7 @@ static int build_inode_path(struct inode *inode, return 0; } dentry = d_find_alias(inode); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); dput(dentry); if (IS_ERR(path)) return PTR_ERR(path); @@ -2778,27 +2818,28 @@ static int build_inode_path(struct inode *inode, * request arguments may be specified via an inode *, a dentry *, or * an explicit ino+path. */ -static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, - struct inode *rdiri, const char *rpath, - u64 rino, const char **ppath, int *pathlen, - u64 *ino, bool *freepath, bool parent_locked) +static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, + struct dentry *rdentry, struct inode *rdiri, + const char *rpath, u64 rino, const char **ppath, + int *pathlen, u64 *ino, bool *freepath, + bool parent_locked) { + struct ceph_client *cl = mdsc->fsc->client; int r = 0; if (rinode) { r = build_inode_path(rinode, ppath, pathlen, ino, freepath); - dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), - ceph_snap(rinode)); + doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), + ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, + r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, freepath, parent_locked); - dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, - *ppath); + doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { *ino = rino; *ppath = rpath; *pathlen = rpath ? strlen(rpath) : 0; - dout(" path %.*s\n", *pathlen, rpath); + doutc(cl, " path %.*s\n", *pathlen, rpath); } return r; @@ -2840,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p, } } +static inline u16 mds_supported_head_version(struct ceph_mds_session *session) +{ + if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features)) + return 1; + + if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) + return 2; + + return CEPH_MDS_REQUEST_HEAD_VERSION; +} + static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { @@ -2861,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_request_head_legacy *lhead; const char *path1 = NULL; @@ -2874,10 +2927,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, void *p, *end; int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); - bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, - &session->s_features); + u16 request_head_version = mds_supported_head_version(session); + kuid_t caller_fsuid = req->r_cred->fsuid; + kgid_t caller_fsgid = req->r_cred->fsgid; - ret = set_request_path_attr(req->r_inode, req->r_dentry, + ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, &path1, &pathlen1, &ino1, &freepath1, test_bit(CEPH_MDS_R_PARENT_LOCKED, @@ -2891,7 +2945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (req->r_old_dentry && !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) old_dentry = req->r_old_dentry; - ret = set_request_path_attr(NULL, old_dentry, + ret = set_request_path_attr(mdsc, NULL, old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2, true); @@ -2916,8 +2970,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, */ if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); - else if (old_version) + else if (request_head_version == 1) len = sizeof(struct ceph_mds_request_head_old); + else if (request_head_version == 2) + len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else len = sizeof(struct ceph_mds_request_head); @@ -2967,6 +3023,30 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead = find_legacy_request_head(msg->front.iov_base, session->s_con.peer_features); + if ((req->r_mnt_idmap != &nop_mnt_idmap) && + !test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) { + WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op)); + + if (enable_unsafe_idmap) { + pr_warn_once_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. UID/GID-based restrictions may" + " not work properly.\n"); + + caller_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns, + VFSUIDT_INIT(req->r_cred->fsuid)); + caller_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns, + VFSGIDT_INIT(req->r_cred->fsgid)); + } else { + pr_err_ratelimited_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. Fail request with -EIO.\n"); + + ret = -EIO; + goto out_err; + } + } + /* * The ceph_mds_request_head_legacy didn't contain a version field, and * one was added when we moved the message version from 3->4. @@ -2974,17 +3054,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) { msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); - } else if (old_version) { + } else if (request_head_version == 1) { struct ceph_mds_request_head_old *ohead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); ohead->version = cpu_to_le16(1); p = msg->front.iov_base + sizeof(*ohead); + } else if (request_head_version == 2) { + struct ceph_mds_request_head *nhead = msg->front.iov_base; + + msg->hdr.version = cpu_to_le16(6); + nhead->version = cpu_to_le16(2); + + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, ext_num_fwd); } else { struct ceph_mds_request_head *nhead = msg->front.iov_base; + kuid_t owner_fsuid; + kgid_t owner_fsgid; msg->hdr.version = cpu_to_le16(6); nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); + nhead->struct_len = cpu_to_le32(sizeof(struct ceph_mds_request_head)); + + if (IS_CEPH_MDS_OP_NEWINODE(req->r_op)) { + owner_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns, + VFSUIDT_INIT(req->r_cred->fsuid)); + owner_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns, + VFSGIDT_INIT(req->r_cred->fsgid)); + nhead->owner_uid = cpu_to_le32(from_kuid(&init_user_ns, owner_fsuid)); + nhead->owner_gid = cpu_to_le32(from_kgid(&init_user_ns, owner_fsgid)); + } else { + nhead->owner_uid = cpu_to_le32(-1); + nhead->owner_gid = cpu_to_le32(-1); + } + p = msg->front.iov_base + sizeof(*nhead); } @@ -2993,9 +3096,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); lhead->op = cpu_to_le32(req->r_op); lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, - req->r_cred->fsuid)); + caller_fsuid)); lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, - req->r_cred->fsgid)); + caller_fsgid)); lhead->ino = cpu_to_le64(req->r_deleg_ino); lhead->args = req->r_args; @@ -3099,6 +3202,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request_head_legacy *lhead; struct ceph_mds_request_head *nhead; struct ceph_msg *msg; @@ -3117,8 +3221,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); if ((old_version && req->r_attempts >= old_max_retry) || ((uint32_t)req->r_attempts >= U32_MAX)) { - pr_warn_ratelimited("%s request tid %llu seq overflow\n", - __func__, req->r_tid); + pr_warn_ratelimited_client(cl, "request tid %llu seq overflow\n", + req->r_tid); return -EMULTIHOP; } } @@ -3133,8 +3237,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, else req->r_sent_on_mseq = -1; } - dout("%s %p tid %lld %s (attempt %d)\n", __func__, req, - req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); + doutc(cl, "%p tid %lld %s (attempt %d)\n", req, req->r_tid, + ceph_mds_op_name(req->r_op), req->r_attempts); if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { void *p; @@ -3202,7 +3306,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); } - dout(" r_parent = %p\n", req->r_parent); + doutc(cl, " r_parent = %p\n", req->r_parent); return 0; } @@ -3230,6 +3334,7 @@ static int __send_request(struct ceph_mds_session *session, static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *session = NULL; int mds = -1; int err = 0; @@ -3242,29 +3347,29 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) { - dout("do_request metadata corrupted\n"); + doutc(cl, "metadata corrupted\n"); err = -EIO; goto finish; } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { - dout("do_request timed out\n"); + doutc(cl, "timed out\n"); err = -ETIMEDOUT; goto finish; } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { - dout("do_request forced umount\n"); + doutc(cl, "forced umount\n"); err = -EIO; goto finish; } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { if (mdsc->mdsmap_err) { err = mdsc->mdsmap_err; - dout("do_request mdsmap err %d\n", err); + doutc(cl, "mdsmap err %d\n", err); goto finish; } if (mdsc->mdsmap->m_epoch == 0) { - dout("do_request no mdsmap, waiting for map\n"); + doutc(cl, "no mdsmap, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3285,7 +3390,7 @@ static void __do_request(struct ceph_mds_client *mdsc, err = -EJUKEBOX; goto finish; } - dout("do_request no mds or not active, waiting for map\n"); + doutc(cl, "no mds or not active, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3301,8 +3406,8 @@ static void __do_request(struct ceph_mds_client *mdsc, } req->r_session = ceph_get_mds_session(session); - dout("do_request mds%d session %p state %s\n", mds, session, - ceph_session_state_name(session->s_state)); + doutc(cl, "mds%d session %p state %s\n", mds, session, + ceph_session_state_name(session->s_state)); /* * The old ceph will crash the MDSs when see unknown OPs @@ -3393,8 +3498,8 @@ static void __do_request(struct ceph_mds_client *mdsc, spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) { - dout("do_request session changed for auth cap %d -> %d\n", - cap->session->s_mds, session->s_mds); + doutc(cl, "session changed for auth cap %d -> %d\n", + cap->session->s_mds, session->s_mds); /* Remove the auth cap from old session */ spin_lock(&cap->session->s_cap_lock); @@ -3421,7 +3526,7 @@ out_session: ceph_put_mds_session(session); finish: if (err) { - dout("__do_request early error %d\n", err); + doutc(cl, "early error %d\n", err); req->r_err = err; complete_request(mdsc, req); __unregister_request(mdsc, req); @@ -3435,6 +3540,7 @@ finish: static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; LIST_HEAD(tmp_list); @@ -3444,7 +3550,8 @@ static void __wake_requests(struct ceph_mds_client *mdsc, req = list_entry(tmp_list.next, struct ceph_mds_request, r_wait); list_del_init(&req->r_wait); - dout(" wake request %p tid %llu\n", req, req->r_tid); + doutc(cl, " wake request %p tid %llu\n", req, + req->r_tid); __do_request(mdsc, req); } } @@ -3455,10 +3562,11 @@ static void __wake_requests(struct ceph_mds_client *mdsc, */ static void kick_requests(struct ceph_mds_client *mdsc, int mds) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct rb_node *p = rb_first(&mdsc->request_tree); - dout("kick_requests mds%d\n", mds); + doutc(cl, "kick_requests mds%d\n", mds); while (p) { req = rb_entry(p, struct ceph_mds_request, r_node); p = rb_next(p); @@ -3468,7 +3576,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) continue; /* only new requests */ if (req->r_session && req->r_session->s_mds == mds) { - dout(" kicking tid %llu\n", req->r_tid); + doutc(cl, " kicking tid %llu\n", req->r_tid); list_del_init(&req->r_wait); __do_request(mdsc, req); } @@ -3478,6 +3586,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; int err = 0; /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ @@ -3499,8 +3608,7 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, if (req->r_inode) { err = ceph_wait_on_async_create(req->r_inode); if (err) { - dout("%s: wait for async create returned: %d\n", - __func__, err); + doutc(cl, "wait for async create returned: %d\n", err); return err; } } @@ -3508,13 +3616,12 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, if (!err && req->r_old_inode) { err = ceph_wait_on_async_create(req->r_old_inode); if (err) { - dout("%s: wait for async create returned: %d\n", - __func__, err); + doutc(cl, "wait for async create returned: %d\n", err); return err; } } - dout("submit_request on %p for inode %p\n", req, dir); + doutc(cl, "submit_request on %p for inode %p\n", req, dir); mutex_lock(&mdsc->mutex); __register_request(mdsc, req, dir); __do_request(mdsc, req); @@ -3527,10 +3634,11 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, ceph_mds_request_wait_callback_t wait_func) { + struct ceph_client *cl = mdsc->fsc->client; int err; /* wait */ - dout("do_request waiting\n"); + doutc(cl, "do_request waiting\n"); if (wait_func) { err = wait_func(mdsc, req); } else { @@ -3544,14 +3652,14 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, else err = timeleft; /* killed */ } - dout("do_request waited, got %d\n", err); + doutc(cl, "do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); /* only abort if we didn't race with a real reply */ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { err = le32_to_cpu(req->r_reply_info.head->result); } else if (err < 0) { - dout("aborted request %lld with %d\n", req->r_tid, err); + doutc(cl, "aborted request %lld with %d\n", req->r_tid, err); /* * ensure we aren't running concurrently with @@ -3582,15 +3690,16 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; int err; - dout("do_request on %p\n", req); + doutc(cl, "do_request on %p\n", req); /* issue */ err = ceph_mdsc_submit_request(mdsc, dir, req); if (!err) err = ceph_mdsc_wait_request(mdsc, req, NULL); - dout("do_request %p done, result %d\n", req, err); + doutc(cl, "do_request %p done, result %d\n", req, err); return err; } @@ -3602,8 +3711,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { struct inode *dir = req->r_parent; struct inode *old_dir = req->r_old_dentry_dir; + struct ceph_client *cl = req->r_mdsc->fsc->client; - dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); + doutc(cl, "invalidate_dir_request %p %p (complete, lease(s))\n", + dir, old_dir); ceph_dir_clear_complete(dir); if (old_dir) @@ -3624,6 +3735,7 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req) static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_mds_reply_head *head = msg->front.iov_base; struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ @@ -3634,7 +3746,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) bool close_sessions = false; if (msg->front.iov_len < sizeof(*head)) { - pr_err("mdsc_handle_reply got corrupt (short) reply\n"); + pr_err_client(cl, "got corrupt (short) reply\n"); ceph_msg_dump(msg); return; } @@ -3644,17 +3756,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&mdsc->mutex); req = lookup_get_request(mdsc, tid); if (!req) { - dout("handle_reply on unknown tid %llu\n", tid); + doutc(cl, "on unknown tid %llu\n", tid); mutex_unlock(&mdsc->mutex); return; } - dout("handle_reply %p\n", req); + doutc(cl, "handle_reply %p\n", req); /* correct session? */ if (req->r_session != session) { - pr_err("mdsc_handle_reply got %llu on session mds%d" - " not mds%d\n", tid, session->s_mds, - req->r_session ? req->r_session->s_mds : -1); + pr_err_client(cl, "got %llu on session mds%d not mds%d\n", + tid, session->s_mds, + req->r_session ? req->r_session->s_mds : -1); mutex_unlock(&mdsc->mutex); goto out; } @@ -3662,14 +3774,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* dup? */ if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) || (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) { - pr_warn("got a dup %s reply on %llu from mds%d\n", - head->safe ? "safe" : "unsafe", tid, mds); + pr_warn_client(cl, "got a dup %s reply on %llu from mds%d\n", + head->safe ? "safe" : "unsafe", tid, mds); mutex_unlock(&mdsc->mutex); goto out; } if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) { - pr_warn("got unsafe after safe on %llu from mds%d\n", - tid, mds); + pr_warn_client(cl, "got unsafe after safe on %llu from mds%d\n", + tid, mds); mutex_unlock(&mdsc->mutex); goto out; } @@ -3692,7 +3804,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * response. And even if it did, there is nothing * useful we could do with a revised return value. */ - dout("got safe reply %llu, mds%d\n", tid, mds); + doutc(cl, "got safe reply %llu, mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); goto out; @@ -3702,7 +3814,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); } - dout("handle_reply tid %lld result %d\n", tid, result); + doutc(cl, "tid %lld result %d\n", tid, result); if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features)) err = parse_reply_info(session, msg, req, (u64)-1); else @@ -3742,7 +3854,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&session->s_mutex); if (err < 0) { - pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); + pr_err_client(cl, "got corrupt reply mds%d(tid:%lld)\n", + mds, tid); ceph_msg_dump(msg); goto out_err; } @@ -3806,7 +3919,7 @@ out_err: set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags); } } else { - dout("reply arrived after request %lld was aborted\n", tid); + doutc(cl, "reply arrived after request %lld was aborted\n", tid); } mutex_unlock(&mdsc->mutex); @@ -3835,6 +3948,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; u64 tid = le64_to_cpu(msg->hdr.tid); u32 next_mds; @@ -3852,12 +3966,12 @@ static void handle_forward(struct ceph_mds_client *mdsc, req = lookup_get_request(mdsc, tid); if (!req) { mutex_unlock(&mdsc->mutex); - dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); + doutc(cl, "forward tid %llu to mds%d - req dne\n", tid, next_mds); return; /* dup reply? */ } if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { - dout("forward tid %llu aborted, unregistering\n", tid); + doutc(cl, "forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) { /* @@ -3873,10 +3987,11 @@ static void handle_forward(struct ceph_mds_client *mdsc, set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); mutex_unlock(&req->r_fill_mutex); aborted = true; - pr_warn_ratelimited("forward tid %llu seq overflow\n", tid); + pr_warn_ratelimited_client(cl, "forward tid %llu seq overflow\n", + tid); } else { /* resend. forward race not possible; mds would drop */ - dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + doutc(cl, "forward tid %llu to mds%d (we resend)\n", tid, next_mds); BUG_ON(req->r_err); BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)); req->r_attempts = 0; @@ -3894,7 +4009,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, return; bad: - pr_err("mdsc_handle_forward decode error err=%d\n", err); + pr_err_client(cl, "decode error err=%d\n", err); ceph_msg_dump(msg); } @@ -3933,6 +4048,7 @@ static void handle_session(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; int mds = session->s_mds; int msg_version = le16_to_cpu(msg->hdr.version); void *p = msg->front.iov_base; @@ -3980,7 +4096,8 @@ static void handle_session(struct ceph_mds_session *session, /* version >= 5, flags */ ceph_decode_32_safe(&p, end, flags, bad); if (flags & CEPH_SESSION_BLOCKLISTED) { - pr_warn("mds%d session blocklisted\n", session->s_mds); + pr_warn_client(cl, "mds%d session blocklisted\n", + session->s_mds); blocklisted = true; } } @@ -3996,22 +4113,24 @@ static void handle_session(struct ceph_mds_session *session, mutex_lock(&session->s_mutex); - dout("handle_session mds%d %s %p state %s seq %llu\n", - mds, ceph_session_op_name(op), session, - ceph_session_state_name(session->s_state), seq); + doutc(cl, "mds%d %s %p state %s seq %llu\n", mds, + ceph_session_op_name(op), session, + ceph_session_state_name(session->s_state), seq); if (session->s_state == CEPH_MDS_SESSION_HUNG) { session->s_state = CEPH_MDS_SESSION_OPEN; - pr_info("mds%d came back\n", session->s_mds); + pr_info_client(cl, "mds%d came back\n", session->s_mds); } switch (op) { case CEPH_SESSION_OPEN: if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) - pr_info("mds%d reconnect success\n", session->s_mds); + pr_info_client(cl, "mds%d reconnect success\n", + session->s_mds); if (session->s_state == CEPH_MDS_SESSION_OPEN) { - pr_notice("mds%d is already opened\n", session->s_mds); + pr_notice_client(cl, "mds%d is already opened\n", + session->s_mds); } else { session->s_state = CEPH_MDS_SESSION_OPEN; session->s_features = features; @@ -4041,7 +4160,8 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_CLOSE: if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) - pr_info("mds%d reconnect denied\n", session->s_mds); + pr_info_client(cl, "mds%d reconnect denied\n", + session->s_mds); session->s_state = CEPH_MDS_SESSION_CLOSED; cleanup_session_requests(mdsc, session); remove_session_caps(session); @@ -4050,8 +4170,8 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_STALE: - pr_info("mds%d caps went stale, renewing\n", - session->s_mds); + pr_info_client(cl, "mds%d caps went stale, renewing\n", + session->s_mds); atomic_inc(&session->s_cap_gen); session->s_cap_ttl = jiffies - 1; send_renew_caps(mdsc, session); @@ -4072,7 +4192,7 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_FORCE_RO: - dout("force_session_readonly %p\n", session); + doutc(cl, "force_session_readonly %p\n", session); spin_lock(&session->s_cap_lock); session->s_readonly = true; spin_unlock(&session->s_cap_lock); @@ -4081,7 +4201,8 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_REJECT: WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING); - pr_info("mds%d rejected session\n", session->s_mds); + pr_info_client(cl, "mds%d rejected session\n", + session->s_mds); session->s_state = CEPH_MDS_SESSION_REJECTED; cleanup_session_requests(mdsc, session); remove_session_caps(session); @@ -4091,7 +4212,7 @@ static void handle_session(struct ceph_mds_session *session, break; default: - pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); + pr_err_client(cl, "bad op %d mds%d\n", op, mds); WARN_ON(1); } @@ -4108,30 +4229,32 @@ static void handle_session(struct ceph_mds_session *session, return; bad: - pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds, - (int)msg->front.iov_len); + pr_err_client(cl, "corrupt message mds%d len %d\n", mds, + (int)msg->front.iov_len); ceph_msg_dump(msg); return; } void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req) { + struct ceph_client *cl = req->r_mdsc->fsc->client; int dcaps; dcaps = xchg(&req->r_dir_caps, 0); if (dcaps) { - dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); + doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); } } void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) { + struct ceph_client *cl = req->r_mdsc->fsc->client; int dcaps; dcaps = xchg(&req->r_dir_caps, 0); if (dcaps) { - dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); + doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent), dcaps); } @@ -4146,7 +4269,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, *nreq; struct rb_node *p; - dout("replay_unsafe_requests mds%d\n", session->s_mds); + doutc(mdsc->fsc->client, "mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) @@ -4290,6 +4413,8 @@ out_unlock: */ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); union { struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; @@ -4307,7 +4432,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) dentry = d_find_primary(inode); if (dentry) { /* set pathbase to parent dir when msg_version >= 2 */ - path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, recon_state->msg_version >= 2); dput(dentry); if (IS_ERR(path)) { @@ -4326,9 +4451,9 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) err = 0; goto out_err; } - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", - inode, ceph_vinop(inode), cap, cap->cap_id, - ceph_cap_string(cap->issued)); + doutc(cl, " adding %p ino %llx.%llx cap %p %lld %s\n", inode, + ceph_vinop(inode), cap, cap->cap_id, + ceph_cap_string(cap->issued)); cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ @@ -4482,6 +4607,7 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc, { struct rb_node *p; struct ceph_pagelist *pagelist = recon_state->pagelist; + struct ceph_client *cl = mdsc->fsc->client; int err = 0; if (recon_state->msg_version >= 4) { @@ -4520,8 +4646,8 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc, ceph_pagelist_encode_32(pagelist, sizeof(sr_rec)); } - dout(" adding snap realm %llx seq %lld parent %llx\n", - realm->ino, realm->seq, realm->parent_ino); + doutc(cl, " adding snap realm %llx seq %lld parent %llx\n", + realm->ino, realm->seq, realm->parent_ino); sr_rec.ino = cpu_to_le64(realm->ino); sr_rec.seq = cpu_to_le64(realm->seq); sr_rec.parent = cpu_to_le64(realm->parent_ino); @@ -4550,6 +4676,7 @@ fail: static void send_mds_reconnect(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *reply; int mds = session->s_mds; int err = -ENOMEM; @@ -4558,7 +4685,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, }; LIST_HEAD(dispose); - pr_info("mds%d reconnect start\n", mds); + pr_info_client(cl, "mds%d reconnect start\n", mds); recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!recon_state.pagelist) @@ -4574,8 +4701,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; - dout("session %p state %s\n", session, - ceph_session_state_name(session->s_state)); + doutc(cl, "session %p state %s\n", session, + ceph_session_state_name(session->s_state)); atomic_inc(&session->s_cap_gen); @@ -4709,7 +4836,8 @@ fail: fail_nomsg: ceph_pagelist_release(recon_state.pagelist); fail_nopagelist: - pr_err("error %d preparing reconnect for mds%d\n", err, mds); + pr_err_client(cl, "error %d preparing reconnect for mds%d\n", + err, mds); return; } @@ -4728,9 +4856,9 @@ static void check_new_map(struct ceph_mds_client *mdsc, int oldstate, newstate; struct ceph_mds_session *s; unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0}; + struct ceph_client *cl = mdsc->fsc->client; - dout("check_new_map new %u old %u\n", - newmap->m_epoch, oldmap->m_epoch); + doutc(cl, "new %u old %u\n", newmap->m_epoch, oldmap->m_epoch); if (newmap->m_info) { for (i = 0; i < newmap->possible_max_rank; i++) { @@ -4746,12 +4874,12 @@ static void check_new_map(struct ceph_mds_client *mdsc, oldstate = ceph_mdsmap_get_state(oldmap, i); newstate = ceph_mdsmap_get_state(newmap, i); - dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", - i, ceph_mds_state_name(oldstate), - ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", - ceph_mds_state_name(newstate), - ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", - ceph_session_state_name(s->s_state)); + doutc(cl, "mds%d state %s%s -> %s%s (session %s)\n", + i, ceph_mds_state_name(oldstate), + ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", + ceph_mds_state_name(newstate), + ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", + ceph_session_state_name(s->s_state)); if (i >= newmap->possible_max_rank) { /* force close session for stopped mds */ @@ -4804,7 +4932,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, newstate >= CEPH_MDS_STATE_ACTIVE) { if (oldstate != CEPH_MDS_STATE_CREATING && oldstate != CEPH_MDS_STATE_STARTING) - pr_info("mds%d recovery completed\n", s->s_mds); + pr_info_client(cl, "mds%d recovery completed\n", + s->s_mds); kick_requests(mdsc, i); mutex_unlock(&mdsc->mutex); mutex_lock(&s->s_mutex); @@ -4848,12 +4977,13 @@ static void check_new_map(struct ceph_mds_client *mdsc, s = __open_export_target_session(mdsc, i); if (IS_ERR(s)) { err = PTR_ERR(s); - pr_err("failed to open export target session, err %d\n", - err); + pr_err_client(cl, + "failed to open export target session, err %d\n", + err); continue; } } - dout("send reconnect to export target mds.%d\n", i); + doutc(cl, "send reconnect to export target mds.%d\n", i); mutex_unlock(&mdsc->mutex); send_mds_reconnect(mdsc, s); ceph_put_mds_session(s); @@ -4869,8 +4999,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG || s->s_state == CEPH_MDS_SESSION_CLOSING) { - dout(" connecting to export targets of laggy mds%d\n", - i); + doutc(cl, " connecting to export targets of laggy mds%d\n", i); __open_export_target_sessions(mdsc, s); } } @@ -4897,6 +5026,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct super_block *sb = mdsc->fsc->sb; struct inode *inode; struct dentry *parent, *dentry; @@ -4908,7 +5038,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct qstr dname; int release = 0; - dout("handle_lease from mds%d\n", mds); + doutc(cl, "from mds%d\n", mds); if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; @@ -4926,20 +5056,19 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease %s, ino %llx %p %.*s\n", - ceph_lease_op_name(h->action), vino.ino, inode, - dname.len, dname.name); + doutc(cl, "%s, ino %llx %p %.*s\n", ceph_lease_op_name(h->action), + vino.ino, inode, dname.len, dname.name); mutex_lock(&session->s_mutex); if (!inode) { - dout("handle_lease no inode %llx\n", vino.ino); + doutc(cl, "no inode %llx\n", vino.ino); goto release; } /* dentry */ parent = d_find_alias(inode); if (!parent) { - dout("no parent dentry on inode %p\n", inode); + doutc(cl, "no parent dentry on inode %p\n", inode); WARN_ON(1); goto release; /* hrm... */ } @@ -4999,7 +5128,7 @@ out: bad: ceph_dec_mds_stopping_blocker(mdsc); - pr_err("corrupt lease message\n"); + pr_err_client(cl, "corrupt lease message\n"); ceph_msg_dump(msg); } @@ -5007,13 +5136,14 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, struct dentry *dentry, char action, u32 seq) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_lease *lease; struct inode *dir; int len = sizeof(*lease) + sizeof(u32) + NAME_MAX; - dout("lease_send_msg identry %p %s to mds%d\n", - dentry, ceph_lease_op_name(action), session->s_mds); + doutc(cl, "identry %p %s to mds%d\n", dentry, ceph_lease_op_name(action), + session->s_mds); msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); if (!msg) @@ -5046,6 +5176,7 @@ static void lock_unlock_session(struct ceph_mds_session *s) static void maybe_recover_session(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_fs_client *fsc = mdsc->fsc; if (!ceph_test_mount_opt(fsc, CLEANRECOVER)) @@ -5057,17 +5188,19 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) if (!READ_ONCE(fsc->blocklisted)) return; - pr_info("auto reconnect after blocklisted\n"); + pr_info_client(cl, "auto reconnect after blocklisted\n"); ceph_force_reconnect(fsc->sb); } bool check_session_state(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; + switch (s->s_state) { case CEPH_MDS_SESSION_OPEN: if (s->s_ttl && time_after(jiffies, s->s_ttl)) { s->s_state = CEPH_MDS_SESSION_HUNG; - pr_info("mds%d hung\n", s->s_mds); + pr_info_client(cl, "mds%d hung\n", s->s_mds); } break; case CEPH_MDS_SESSION_CLOSING: @@ -5087,6 +5220,8 @@ bool check_session_state(struct ceph_mds_session *s) */ void inc_session_sequence(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; + lockdep_assert_held(&s->s_mutex); s->s_seq++; @@ -5094,11 +5229,11 @@ void inc_session_sequence(struct ceph_mds_session *s) if (s->s_state == CEPH_MDS_SESSION_CLOSING) { int ret; - dout("resending session close request for mds%d\n", s->s_mds); + doutc(cl, "resending session close request for mds%d\n", s->s_mds); ret = request_close_session(s); if (ret < 0) - pr_err("unable to close session to mds%d: %d\n", - s->s_mds, ret); + pr_err_client(cl, "unable to close session to mds%d: %d\n", + s->s_mds, ret); } } @@ -5127,7 +5262,7 @@ static void delayed_work(struct work_struct *work) int renew_caps; int i; - dout("mdsc delayed_work\n"); + doutc(mdsc->fsc->client, "mdsc delayed_work\n"); if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; @@ -5256,6 +5391,7 @@ err_mdsc: */ static void wait_requests(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_request *req; @@ -5263,25 +5399,25 @@ static void wait_requests(struct ceph_mds_client *mdsc) if (__get_oldest_req(mdsc)) { mutex_unlock(&mdsc->mutex); - dout("wait_requests waiting for requests\n"); + doutc(cl, "waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); while ((req = __get_oldest_req(mdsc))) { - dout("wait_requests timed out on tid %llu\n", - req->r_tid); + doutc(cl, "timed out on tid %llu\n", req->r_tid); list_del_init(&req->r_wait); __unregister_request(mdsc, req); } } mutex_unlock(&mdsc->mutex); - dout("wait_requests done\n"); + doutc(cl, "done\n"); } void send_flush_mdlog(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; struct ceph_msg *msg; /* @@ -5291,13 +5427,13 @@ void send_flush_mdlog(struct ceph_mds_session *s) return; mutex_lock(&s->s_mutex); - dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds, - ceph_session_state_name(s->s_state), s->s_seq); + doutc(cl, "request mdlog flush to mds%d (%s)s seq %lld\n", + s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, s->s_seq); if (!msg) { - pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n", - s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); + pr_err_client(cl, "failed to request mdlog flush to mds%d (%s) seq %lld\n", + s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); } else { ceph_con_send(&s->s_con, msg); } @@ -5310,7 +5446,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) */ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { - dout("pre_umount\n"); + doutc(mdsc->fsc->client, "begin\n"); mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); @@ -5325,6 +5461,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) ceph_msgr_flush(); ceph_cleanup_quotarealms_inodes(mdsc); + doutc(mdsc->fsc->client, "done\n"); } /* @@ -5333,12 +5470,13 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req = NULL, *nextreq; struct ceph_mds_session *last_session = NULL; struct rb_node *n; mutex_lock(&mdsc->mutex); - dout("%s want %lld\n", __func__, want_tid); + doutc(cl, "want %lld\n", want_tid); restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { @@ -5372,8 +5510,8 @@ restart: } else { ceph_put_mds_session(s); } - dout("%s wait on %llu (want %llu)\n", __func__, - req->r_tid, want_tid); + doutc(cl, "wait on %llu (want %llu)\n", + req->r_tid, want_tid); wait_for_completion(&req->r_safe_completion); mutex_lock(&mdsc->mutex); @@ -5391,17 +5529,18 @@ restart: } mutex_unlock(&mdsc->mutex); ceph_put_mds_session(last_session); - dout("%s done\n", __func__); + doutc(cl, "done\n"); } void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; u64 want_tid, want_flush; if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) return; - dout("sync\n"); + doutc(cl, "sync\n"); mutex_lock(&mdsc->mutex); want_tid = mdsc->last_tid; mutex_unlock(&mdsc->mutex); @@ -5417,8 +5556,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) } spin_unlock(&mdsc->cap_dirty_lock); - dout("sync want tid %lld flush_seq %lld\n", - want_tid, want_flush); + doutc(cl, "sync want tid %lld flush_seq %lld\n", want_tid, want_flush); flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid); wait_caps_flush(mdsc, want_flush); @@ -5440,11 +5578,12 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { struct ceph_options *opts = mdsc->fsc->client->options; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *session; int i; int skipped = 0; - dout("close_sessions\n"); + doutc(cl, "begin\n"); /* close sessions */ mutex_lock(&mdsc->mutex); @@ -5462,7 +5601,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) } mutex_unlock(&mdsc->mutex); - dout("waiting for sessions to close\n"); + doutc(cl, "waiting for sessions to close\n"); wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc, skipped), ceph_timeout_jiffies(opts->mount_timeout)); @@ -5490,7 +5629,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) cancel_work_sync(&mdsc->cap_reclaim_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ - dout("stopped\n"); + doutc(cl, "done\n"); } void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) @@ -5498,7 +5637,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) struct ceph_mds_session *session; int mds; - dout("force umount\n"); + doutc(mdsc->fsc->client, "force umount\n"); mutex_lock(&mdsc->mutex); for (mds = 0; mds < mdsc->max_sessions; mds++) { @@ -5529,7 +5668,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { - dout("stop\n"); + doutc(mdsc->fsc->client, "stop\n"); /* * Make sure the delayed work stopped before releasing * the resources. @@ -5550,7 +5689,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) void ceph_mdsc_destroy(struct ceph_fs_client *fsc) { struct ceph_mds_client *mdsc = fsc->mdsc; - dout("mdsc_destroy %p\n", mdsc); + doutc(fsc->client, "%p\n", mdsc); if (!mdsc) return; @@ -5564,12 +5703,13 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) fsc->mdsc = NULL; kfree(mdsc); - dout("mdsc_destroy %p done\n", mdsc); + doutc(fsc->client, "%p done\n", mdsc); } void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { struct ceph_fs_client *fsc = mdsc->fsc; + struct ceph_client *cl = fsc->client; const char *mds_namespace = fsc->mount_options->mds_namespace; void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; @@ -5581,7 +5721,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) ceph_decode_need(&p, end, sizeof(u32), bad); epoch = ceph_decode_32(&p); - dout("handle_fsmap epoch %u\n", epoch); + doutc(cl, "epoch %u\n", epoch); /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */ ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad); @@ -5626,7 +5766,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; bad: - pr_err("error decoding fsmap %d. Shutting down mount.\n", err); + pr_err_client(cl, "error decoding fsmap %d. Shutting down mount.\n", + err); ceph_umount_begin(mdsc->fsc->sb); ceph_msg_dump(msg); err_out: @@ -5641,6 +5782,7 @@ err_out: */ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; u32 epoch; u32 maplen; void *p = msg->front.iov_base; @@ -5655,18 +5797,17 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); - dout("handle_map epoch %u len %d\n", epoch, (int)maplen); + doutc(cl, "epoch %u len %d\n", epoch, (int)maplen); /* do we need it? */ mutex_lock(&mdsc->mutex); if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { - dout("handle_map epoch %u <= our %u\n", - epoch, mdsc->mdsmap->m_epoch); + doutc(cl, "epoch %u <= our %u\n", epoch, mdsc->mdsmap->m_epoch); mutex_unlock(&mdsc->mutex); return; } - newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client)); + newmap = ceph_mdsmap_decode(mdsc, &p, end, ceph_msgr2(mdsc->fsc->client)); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad_unlock; @@ -5695,7 +5836,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) bad_unlock: mutex_unlock(&mdsc->mutex); bad: - pr_err("error decoding mdsmap %d. Shutting down mount.\n", err); + pr_err_client(cl, "error decoding mdsmap %d. Shutting down mount.\n", + err); ceph_umount_begin(mdsc->fsc->sb); ceph_msg_dump(msg); return; @@ -5726,7 +5868,8 @@ static void mds_peer_reset(struct ceph_connection *con) struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - pr_warn("mds%d closed our session\n", s->s_mds); + pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", + s->s_mds); if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) send_mds_reconnect(mdsc, s); } @@ -5735,6 +5878,7 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; int type = le16_to_cpu(msg->hdr.type); mutex_lock(&mdsc->mutex); @@ -5774,8 +5918,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) break; default: - pr_err("received unknown message type %d %s\n", type, - ceph_msg_type_name(type)); + pr_err_client(cl, "received unknown message type %d %s\n", + type, ceph_msg_type_name(type)); } out: ceph_msg_put(msg); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5a3714bdd64a..2e6ddaa13d72 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -14,9 +14,9 @@ #include <linux/ceph/types.h> #include <linux/ceph/messenger.h> -#include <linux/ceph/mdsmap.h> #include <linux/ceph/auth.h> +#include "mdsmap.h" #include "metric.h" #include "super.h" @@ -33,8 +33,10 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, CEPHFS_FEATURE_OP_GETVXATTR, CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_NEW_SNAPREALM_INFO, + CEPHFS_FEATURE_HAS_OWNER_UIDGID, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -49,6 +51,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ + CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ } /* @@ -300,6 +303,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ int r_request_release_offset; const struct cred *r_cred; + struct mnt_idmap *r_mnt_idmap; struct timespec64 r_stamp; /* for choosing which mds to send this request to */ @@ -581,7 +585,8 @@ static inline void ceph_mdsc_free_path(char *path, int len) __putname(path - (PATH_MAX - 1 - len)); } -extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, +extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, int *plen, u64 *base, int for_wire); extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); @@ -614,4 +619,6 @@ static inline int ceph_wait_on_async_create(struct inode *inode) extern int ceph_wait_on_conflict_unlink(struct dentry *dentry); extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session); extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino); + +extern bool enable_unsafe_idmap; #endif diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 7dac21ee6ce7..fae97c25ce58 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -7,10 +7,11 @@ #include <linux/slab.h> #include <linux/types.h> -#include <linux/ceph/mdsmap.h> #include <linux/ceph/messenger.h> #include <linux/ceph/decode.h> +#include "mdsmap.h" +#include "mds_client.h" #include "super.h" #define CEPH_MDS_IS_READY(i, ignore_laggy) \ @@ -114,8 +115,10 @@ bad: * Ignore any fields we don't care about (there are quite a few of * them). */ -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mdsmap *m; const void *start = *p; int i, j, n; @@ -233,20 +236,18 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) *p = info_end; } - dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n", - i+1, n, global_id, mds, inc, - ceph_pr_addr(&addr), - ceph_mds_state_name(state), - laggy ? "(laggy)" : ""); + doutc(cl, "%d/%d %lld mds%d.%d %s %s%s\n", i+1, n, global_id, + mds, inc, ceph_pr_addr(&addr), + ceph_mds_state_name(state), laggy ? "(laggy)" : ""); if (mds < 0 || mds >= m->possible_max_rank) { - pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); + pr_warn_client(cl, "got incorrect mds(%d)\n", mds); continue; } if (state <= 0) { - dout("mdsmap_decode got incorrect state(%s)\n", - ceph_mds_state_name(state)); + doutc(cl, "got incorrect state(%s)\n", + ceph_mds_state_name(state)); continue; } @@ -385,16 +386,16 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) m->m_max_xattr_size = 0; } bad_ext: - dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", - !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); + doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", + !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); *p = end; - dout("mdsmap_decode success epoch %u\n", m->m_epoch); + doutc(cl, "success epoch %u\n", m->m_epoch); return m; nomem: err = -ENOMEM; goto out_err; corrupt: - pr_err("corrupt mdsmap\n"); + pr_err_client(cl, "corrupt mdsmap\n"); print_hex_dump(KERN_DEBUG, "mdsmap: ", DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); diff --git a/include/linux/ceph/mdsmap.h b/fs/ceph/mdsmap.h index 4c3e0648dc27..89f1931f1ba6 100644 --- a/include/linux/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h @@ -5,6 +5,8 @@ #include <linux/bug.h> #include <linux/ceph/types.h> +struct ceph_mds_client; + /* * mds map - describe servers in the mds cluster. * @@ -65,7 +67,8 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) } extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 6d3584f16f9a..871c1090e520 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, struct ceph_client_metric *m = &mdsc->metric; u64 nr_caps = atomic64_read(&m->total_caps); u32 header_len = sizeof(struct ceph_metric_header); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; s64 sum; s32 items = 0; @@ -51,8 +52,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); if (!msg) { - pr_err("send metrics to mds%d, failed to allocate message\n", - s->s_mds); + pr_err_client(cl, "to mds%d, failed to allocate message\n", + s->s_mds); return false; } diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index f7fcf7f08ec6..9d36c3532de1 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -43,6 +43,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, { struct super_block *sb = mdsc->fsc->sb; struct ceph_mds_quota *h = msg->front.iov_base; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino; struct inode *inode; struct ceph_inode_info *ci; @@ -51,8 +52,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, return; if (msg->front.iov_len < sizeof(*h)) { - pr_err("%s corrupt message mds%d len %d\n", __func__, - session->s_mds, (int)msg->front.iov_len); + pr_err_client(cl, "corrupt message mds%d len %d\n", + session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); goto out; } @@ -62,7 +63,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, vino.snap = CEPH_NOSNAP; inode = ceph_find_inode(sb, vino); if (!inode) { - pr_warn("Failed to find inode %llu\n", vino.ino); + pr_warn_client(cl, "failed to find inode %llx\n", vino.ino); goto out; } ci = ceph_inode(inode); @@ -85,6 +86,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) { struct ceph_quotarealm_inode *qri = NULL; struct rb_node **node, *parent = NULL; + struct ceph_client *cl = mdsc->fsc->client; mutex_lock(&mdsc->quotarealms_inodes_mutex); node = &(mdsc->quotarealms_inodes.rb_node); @@ -110,7 +112,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) rb_link_node(&qri->node, parent, node); rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); } else - pr_warn("Failed to alloc quotarealms_inode\n"); + pr_warn_client(cl, "Failed to alloc quotarealms_inode\n"); } mutex_unlock(&mdsc->quotarealms_inodes_mutex); @@ -129,6 +131,7 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, struct super_block *sb, struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_quotarealm_inode *qri; struct inode *in; @@ -161,8 +164,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - dout("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino, + PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; @@ -213,6 +216,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, enum quota_get_realm which_quota, bool retry) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; struct inode *in; @@ -226,8 +230,9 @@ restart: if (realm) ceph_get_snap_realm(mdsc, realm); else - pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " - "null i_snap_realm\n", ceph_vinop(inode)); + pr_err_ratelimited_client(cl, + "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); while (realm) { bool has_inode; @@ -317,6 +322,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, loff_t delta) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; struct inode *in; @@ -332,8 +338,9 @@ restart: if (realm) ceph_get_snap_realm(mdsc, realm); else - pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " - "null i_snap_realm\n", ceph_vinop(inode)); + pr_err_ratelimited_client(cl, + "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); while (realm) { bool has_inode; @@ -383,7 +390,7 @@ restart: break; default: /* Shouldn't happen */ - pr_warn("Invalid quota check op (%d)\n", op); + pr_warn_client(cl, "Invalid quota check op (%d)\n", op); exceeded = true; /* Just break the loop */ } iput(in); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 6732e1ea97d9..c65f2b202b2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -138,7 +138,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( __insert_snap_realm(&mdsc->snap_realms, realm); mdsc->num_snap_realms++; - dout("%s %llx %p\n", __func__, realm->ino, realm); + doutc(mdsc->fsc->client, "%llx %p\n", realm->ino, realm); return realm; } @@ -150,6 +150,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { + struct ceph_client *cl = mdsc->fsc->client; struct rb_node *n = mdsc->snap_realms.rb_node; struct ceph_snap_realm *r; @@ -162,7 +163,7 @@ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, else if (ino > r->ino) n = n->rb_right; else { - dout("%s %llx %p\n", __func__, r->ino, r); + doutc(cl, "%llx %p\n", r->ino, r); return r; } } @@ -188,9 +189,10 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc, static void __destroy_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("%s %p %llx\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx\n", realm, realm->ino); rb_erase(&realm->node, &mdsc->snap_realms); mdsc->num_snap_realms--; @@ -290,6 +292,7 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, u64 parentino) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent; lockdep_assert_held_write(&mdsc->snap_rwsem); @@ -303,8 +306,8 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, if (IS_ERR(parent)) return PTR_ERR(parent); } - dout("%s %llx %p: %llx %p -> %llx %p\n", __func__, realm->ino, - realm, realm->parent_ino, realm->parent, parentino, parent); + doutc(cl, "%llx %p: %llx %p -> %llx %p\n", realm->ino, realm, + realm->parent_ino, realm->parent, parentino, parent); if (realm->parent) { list_del_init(&realm->child_item); ceph_put_snap_realm(mdsc, realm->parent); @@ -329,10 +332,12 @@ static int cmpu64_rev(const void *a, const void *b) /* * build the snap context for a given realm. */ -static int build_snap_context(struct ceph_snap_realm *realm, +static int build_snap_context(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *realm_queue, struct list_head *dirty_realms) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; int err = 0; @@ -360,10 +365,10 @@ static int build_snap_context(struct ceph_snap_realm *realm, realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { - dout("%s %llx %p: %p seq %lld (%u snaps) (unchanged)\n", - __func__, realm->ino, realm, realm->cached_context, - realm->cached_context->seq, - (unsigned int)realm->cached_context->num_snaps); + doutc(cl, "%llx %p: %p seq %lld (%u snaps) (unchanged)\n", + realm->ino, realm, realm->cached_context, + realm->cached_context->seq, + (unsigned int)realm->cached_context->num_snaps); return 0; } @@ -400,8 +405,8 @@ static int build_snap_context(struct ceph_snap_realm *realm, sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; - dout("%s %llx %p: %p seq %lld (%u snaps)\n", __func__, realm->ino, - realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); + doutc(cl, "%llx %p: %p seq %lld (%u snaps)\n", realm->ino, realm, + snapc, snapc->seq, (unsigned int) snapc->num_snaps); ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; @@ -418,16 +423,18 @@ fail: ceph_put_snap_context(realm->cached_context); realm->cached_context = NULL; } - pr_err("%s %llx %p fail %d\n", __func__, realm->ino, realm, err); + pr_err_client(cl, "%llx %p fail %d\n", realm->ino, realm, err); return err; } /* * rebuild snap context for the given realm and all of its children. */ -static void rebuild_snap_realms(struct ceph_snap_realm *realm, +static void rebuild_snap_realms(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *dirty_realms) { + struct ceph_client *cl = mdsc->fsc->client; LIST_HEAD(realm_queue); int last = 0; bool skip = false; @@ -451,9 +458,10 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm, continue; } - last = build_snap_context(_realm, &realm_queue, dirty_realms); - dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm, - last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); + last = build_snap_context(mdsc, _realm, &realm_queue, + dirty_realms); + doutc(cl, "%llx %p, %s\n", realm->ino, realm, + last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); /* is any child in the list ? */ list_for_each_entry(child, &_realm->children, child_item) { @@ -523,6 +531,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap **pcapsnap) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *old_snapc, *new_snapc; struct ceph_cap_snap *capsnap = *pcapsnap; struct ceph_buffer *old_blob = NULL; @@ -548,14 +557,14 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, as no new writes are allowed to start when pending, so any writes in progress now were started before the previous cap_snap. lucky us. */ - dout("%s %p %llx.%llx already pending\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx already pending\n", inode, + ceph_vinop(inode)); goto update_snapc; } if (ci->i_wrbuffer_ref_head == 0 && !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { - dout("%s %p %llx.%llx nothing dirty|writing\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx nothing dirty|writing\n", inode, + ceph_vinop(inode)); goto update_snapc; } @@ -575,15 +584,15 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, } else { if (!(used & CEPH_CAP_FILE_WR) && ci->i_wrbuffer_ref_head == 0) { - dout("%s %p %llx.%llx no new_snap|dirty_page|writing\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx no new_snap|dirty_page|writing\n", + inode, ceph_vinop(inode)); goto update_snapc; } } - dout("%s %p %llx.%llx cap_snap %p queuing under %p %s %s\n", - __func__, inode, ceph_vinop(inode), capsnap, old_snapc, - ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); + doutc(cl, "%p %llx.%llx cap_snap %p queuing under %p %s %s\n", + inode, ceph_vinop(inode), capsnap, old_snapc, + ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); capsnap->follows = old_snapc->seq; @@ -615,9 +624,9 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); if (used & CEPH_CAP_FILE_WR) { - dout("%s %p %llx.%llx cap_snap %p snapc %p seq %llu used WR," - " now pending\n", __func__, inode, ceph_vinop(inode), - capsnap, old_snapc, old_snapc->seq); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p seq %llu used WR," + " now pending\n", inode, ceph_vinop(inode), capsnap, + old_snapc, old_snapc->seq); capsnap->writing = 1; } else { /* note mtime, size NOW. */ @@ -634,7 +643,7 @@ update_snapc: ci->i_head_snapc = NULL; } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); - dout(" new snapc is %p\n", new_snapc); + doutc(cl, " new snapc is %p\n", new_snapc); } spin_unlock(&ci->i_ceph_lock); @@ -655,6 +664,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; BUG_ON(capsnap->writing); capsnap->size = i_size_read(inode); @@ -667,11 +677,12 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; if (capsnap->dirty_pages) { - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " - "still has %d dirty pages\n", __func__, inode, - ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size, capsnap->dirty_pages); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " + "s=%llu still has %d dirty pages\n", inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, + ceph_cap_string(capsnap->dirty), + capsnap->size, capsnap->dirty_pages); return 0; } @@ -680,20 +691,20 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, * And trigger to flush the buffer immediately. */ if (ci->i_wrbuffer_ref) { - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " - "used WRBUFFER, delaying\n", __func__, inode, - ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " + "s=%llu used WRBUFFER, delaying\n", inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); ceph_queue_writeback(inode); return 0; } ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", - __func__, inode, ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", + inode, ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); spin_lock(&mdsc->snap_flush_lock); if (list_empty(&ci->i_snap_flush_item)) { @@ -708,13 +719,15 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, * Queue cap_snaps for snap writeback for this realm and its children. * Called under snap_rwsem, so realm topology won't change. */ -static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) +static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *lastinode = NULL; struct ceph_cap_snap *capsnap = NULL; - dout("%s %p %llx inode\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx inode\n", realm, realm->ino); spin_lock(&realm->inodes_with_caps_lock); list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { @@ -733,8 +746,9 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (!capsnap) { capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS); if (!capsnap) { - pr_err("ENOMEM allocating ceph_cap_snap on %p\n", - inode); + pr_err_client(cl, + "ENOMEM allocating ceph_cap_snap on %p\n", + inode); return; } } @@ -752,7 +766,7 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (capsnap) kmem_cache_free(ceph_cap_snap_cachep, capsnap); - dout("%s %p %llx done\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx done\n", realm, realm->ino); } /* @@ -766,6 +780,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, void *p, void *e, bool deletion, struct ceph_snap_realm **realm_ret) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ @@ -780,7 +795,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("%s deletion=%d\n", __func__, deletion); + doutc(cl, "deletion=%d\n", deletion); more: realm = NULL; rebuild_snapcs = 0; @@ -810,8 +825,8 @@ more: rebuild_snapcs += err; if (le64_to_cpu(ri->seq) > realm->seq) { - dout("%s updating %llx %p %lld -> %lld\n", __func__, - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); + doutc(cl, "updating %llx %p %lld -> %lld\n", realm->ino, + realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -834,16 +849,16 @@ more: rebuild_snapcs = 1; } else if (!realm->cached_context) { - dout("%s %llx %p seq %lld new\n", __func__, - realm->ino, realm, realm->seq); + doutc(cl, "%llx %p seq %lld new\n", realm->ino, realm, + realm->seq); rebuild_snapcs = 1; } else { - dout("%s %llx %p seq %lld unchanged\n", __func__, - realm->ino, realm, realm->seq); + doutc(cl, "%llx %p seq %lld unchanged\n", realm->ino, realm, + realm->seq); } - dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, - realm, rebuild_snapcs, p, e); + doutc(cl, "done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, + realm, rebuild_snapcs, p, e); /* * this will always track the uppest parent realm from which @@ -855,7 +870,7 @@ more: /* rebuild_snapcs when we reach the _end_ (root) of the trace */ if (realm_to_rebuild && p >= e) - rebuild_snap_realms(realm_to_rebuild, &dirty_realms); + rebuild_snap_realms(mdsc, realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; @@ -873,7 +888,7 @@ more: realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, dirty_item); list_del_init(&realm->dirty_item); - queue_realm_cap_snaps(realm); + queue_realm_cap_snaps(mdsc, realm); } if (realm_ret) @@ -891,7 +906,7 @@ fail: ceph_put_snap_realm(mdsc, realm); if (first_realm) ceph_put_snap_realm(mdsc, first_realm); - pr_err("%s error %d\n", __func__, err); + pr_err_client(cl, "error %d\n", err); /* * When receiving a corrupted snap trace we don't know what @@ -905,11 +920,12 @@ fail: WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); if (ret) - pr_err("%s failed to blocklist %s: %d\n", __func__, - ceph_pr_addr(&client->msgr.inst.addr), ret); + pr_err_client(cl, "failed to blocklist %s: %d\n", + ceph_pr_addr(&client->msgr.inst.addr), ret); - WARN(1, "%s: %s%sdo remount to continue%s", - __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), + WARN(1, "[client.%lld] %s %s%sdo remount to continue%s", + client->monc.auth->global_id, __func__, + ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), ret ? "" : " was blocklisted, ", err == -EIO ? " after corrupted snaptrace is fixed" : ""); @@ -925,11 +941,12 @@ fail: */ static void flush_snaps(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; struct ceph_mds_session *session = NULL; - dout("%s\n", __func__); + doutc(cl, "begin\n"); spin_lock(&mdsc->snap_flush_lock); while (!list_empty(&mdsc->snap_flush_list)) { ci = list_first_entry(&mdsc->snap_flush_list, @@ -944,7 +961,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) spin_unlock(&mdsc->snap_flush_lock); ceph_put_mds_session(session); - dout("%s done\n", __func__); + doutc(cl, "done\n"); } /** @@ -960,7 +977,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_snap_realm *oldrealm = ci->i_snap_realm; lockdep_assert_held(&ci->i_ceph_lock); @@ -1000,6 +1017,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct super_block *sb = mdsc->fsc->sb; int mds = session->s_mds; u64 split; @@ -1030,8 +1048,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, trace_len = le32_to_cpu(h->trace_len); p += sizeof(*h); - dout("%s from mds%d op %s split %llx tracelen %d\n", __func__, - mds, ceph_snap_op_name(op), split, trace_len); + doutc(cl, "from mds%d op %s split %llx tracelen %d\n", mds, + ceph_snap_op_name(op), split, trace_len); down_write(&mdsc->snap_rwsem); locked_rwsem = 1; @@ -1062,7 +1080,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, goto out; } - dout("splitting snap_realm %llx %p\n", realm->ino, realm); + doutc(cl, "splitting snap_realm %llx %p\n", realm->ino, realm); for (i = 0; i < num_split_inos; i++) { struct ceph_vino vino = { .ino = le64_to_cpu(split_inos[i]), @@ -1087,13 +1105,13 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, */ if (ci->i_snap_realm->created > le64_to_cpu(ri->created)) { - dout(" leaving %p %llx.%llx in newer realm %llx %p\n", - inode, ceph_vinop(inode), ci->i_snap_realm->ino, - ci->i_snap_realm); + doutc(cl, " leaving %p %llx.%llx in newer realm %llx %p\n", + inode, ceph_vinop(inode), ci->i_snap_realm->ino, + ci->i_snap_realm); goto skip_inode; } - dout(" will move %p %llx.%llx to split realm %llx %p\n", - inode, ceph_vinop(inode), realm->ino, realm); + doutc(cl, " will move %p %llx.%llx to split realm %llx %p\n", + inode, ceph_vinop(inode), realm->ino, realm); ceph_get_snap_realm(mdsc, realm); ceph_change_snap_realm(inode, realm); @@ -1154,7 +1172,7 @@ skip_inode: return; bad: - pr_err("%s corrupt snap message from mds%d\n", __func__, mds); + pr_err_client(cl, "corrupt snap message from mds%d\n", mds); ceph_msg_dump(msg); out: if (locked_rwsem) @@ -1170,6 +1188,7 @@ out: struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm, *exist; struct rb_node **p, *parent; int ret; @@ -1192,8 +1211,8 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, } spin_unlock(&mdsc->snapid_map_lock); if (exist) { - dout("%s found snapid map %llx -> %x\n", __func__, - exist->snap, exist->dev); + doutc(cl, "found snapid map %llx -> %x\n", exist->snap, + exist->dev); return exist; } @@ -1237,13 +1256,12 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, if (exist) { free_anon_bdev(sm->dev); kfree(sm); - dout("%s found snapid map %llx -> %x\n", __func__, - exist->snap, exist->dev); + doutc(cl, "found snapid map %llx -> %x\n", exist->snap, + exist->dev); return exist; } - dout("%s create snapid map %llx -> %x\n", __func__, - sm->snap, sm->dev); + doutc(cl, "create snapid map %llx -> %x\n", sm->snap, sm->dev); return sm; } @@ -1268,6 +1286,7 @@ void ceph_put_snapid_map(struct ceph_mds_client* mdsc, void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; unsigned long now; LIST_HEAD(to_free); @@ -1289,7 +1308,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) while (!list_empty(&to_free)) { sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); list_del(&sm->lru); - dout("trim snapid map %llx -> %x\n", sm->snap, sm->dev); + doutc(cl, "trim snapid map %llx -> %x\n", sm->snap, sm->dev); free_anon_bdev(sm->dev); kfree(sm); } @@ -1297,6 +1316,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; struct rb_node *p; LIST_HEAD(to_free); @@ -1315,8 +1335,8 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) list_del(&sm->lru); free_anon_bdev(sm->dev); if (WARN_ON_ONCE(atomic_read(&sm->ref))) { - pr_err("snapid map %llx -> %x still in use\n", - sm->snap, sm->dev); + pr_err_client(cl, "snapid map %llx -> %x still in use\n", + sm->snap, sm->dev); } kfree(sm); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2d7f5a8d4a92..5ec102f6b1ac 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -44,28 +44,29 @@ static LIST_HEAD(ceph_fsc_list); */ static void ceph_put_super(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); - dout("put_super\n"); + doutc(fsc->client, "begin\n"); ceph_fscrypt_free_dummy_policy(fsc); ceph_mdsc_close_sessions(fsc->mdsc); + doutc(fsc->client, "done\n"); } static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry)); struct ceph_mon_client *monc = &fsc->client->monc; struct ceph_statfs st; int i, err; u64 data_pool; + doutc(fsc->client, "begin\n"); if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; } else { data_pool = CEPH_NOPOOL; } - dout("statfs\n"); err = ceph_monc_do_statfs(monc, data_pool, &st); if (err < 0) return err; @@ -113,24 +114,26 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* fold the fs_cluster_id into the upper bits */ buf->f_fsid.val[1] = monc->fs_cluster_id; + doutc(fsc->client, "done\n"); return 0; } static int ceph_sync_fs(struct super_block *sb, int wait) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; if (!wait) { - dout("sync_fs (non-blocking)\n"); + doutc(cl, "(non-blocking)\n"); ceph_flush_dirty_caps(fsc->mdsc); - dout("sync_fs (non-blocking) done\n"); + doutc(cl, "(non-blocking) done\n"); return 0; } - dout("sync_fs (blocking)\n"); + doutc(cl, "(blocking)\n"); ceph_osdc_sync(&fsc->client->osdc); ceph_mdsc_sync(fsc->mdsc); - dout("sync_fs (blocking) done\n"); + doutc(cl, "(blocking) done\n"); return 0; } @@ -341,7 +344,7 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) char *dev_name = param->string, *dev_name_end; int ret; - dout("%s '%s'\n", __func__, dev_name); + dout("'%s'\n", dev_name); if (!dev_name || !*dev_name) return invalfc(fc, "Empty source"); @@ -413,7 +416,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, return ret; token = fs_parse(fc, ceph_mount_parameters, param, &result); - dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); + dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token); if (token < 0) return token; @@ -684,7 +687,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, */ static int ceph_show_options(struct seq_file *m, struct dentry *root) { - struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb); struct ceph_mount_options *fsopt = fsc->mount_options; size_t pos; int ret; @@ -881,7 +884,7 @@ static void flush_fs_workqueues(struct ceph_fs_client *fsc) static void destroy_fs_client(struct ceph_fs_client *fsc) { - dout("destroy_fs_client %p\n", fsc); + doutc(fsc->client, "%p\n", fsc); spin_lock(&ceph_fsc_lock); list_del(&fsc->metric_wakeup); @@ -896,7 +899,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) ceph_destroy_client(fsc->client); kfree(fsc); - dout("destroy_fs_client %p done\n", fsc); + dout("%s: %p done\n", __func__, fsc); } /* @@ -1015,9 +1018,9 @@ static void __ceph_umount_begin(struct ceph_fs_client *fsc) */ void ceph_umount_begin(struct super_block *sb) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); - dout("ceph_umount_begin - starting forced umount\n"); + doutc(fsc->client, "starting forced umount\n"); if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; @@ -1045,13 +1048,14 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, const char *path, unsigned long started) { + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req = NULL; int err; struct dentry *root; /* open dir */ - dout("open_root_inode opening '%s'\n", path); + doutc(cl, "opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) return ERR_CAST(req); @@ -1071,13 +1075,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, if (err == 0) { struct inode *inode = req->r_target_inode; req->r_target_inode = NULL; - dout("open_root_inode success\n"); + doutc(cl, "success\n"); root = d_make_root(inode); if (!root) { root = ERR_PTR(-ENOMEM); goto out; } - dout("open_root_inode success, root dentry is %p\n", root); + doutc(cl, "success, root dentry is %p\n", root); } else { root = ERR_PTR(err); } @@ -1136,11 +1140,12 @@ static int ceph_apply_test_dummy_encryption(struct super_block *sb, static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, struct fs_context *fc) { + struct ceph_client *cl = fsc->client; int err; unsigned long started = jiffies; /* note the start time */ struct dentry *root; - dout("mount start %p\n", fsc); + doutc(cl, "mount start %p\n", fsc); mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { @@ -1163,7 +1168,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, if (err) goto out; - dout("mount opening path '%s'\n", path); + doutc(cl, "mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); @@ -1178,7 +1183,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, } fsc->mount_state = CEPH_MOUNT_MOUNTED; - dout("mount success\n"); + doutc(cl, "mount success\n"); mutex_unlock(&fsc->client->mount_mutex); return root; @@ -1191,9 +1196,10 @@ out: static int ceph_set_super(struct super_block *s, struct fs_context *fc) { struct ceph_fs_client *fsc = s->s_fs_info; + struct ceph_client *cl = fsc->client; int ret; - dout("set_super %p\n", s); + doutc(cl, "%p\n", s); s->s_maxbytes = MAX_LFS_FILESIZE; @@ -1226,31 +1232,32 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) struct ceph_fs_client *new = fc->s_fs_info; struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; - dout("ceph_compare_super %p\n", sb); + doutc(cl, "%p\n", sb); if (compare_mount_options(fsopt, opt, fsc)) { - dout("monitor(s)/mount options don't match\n"); + doutc(cl, "monitor(s)/mount options don't match\n"); return 0; } if ((opt->flags & CEPH_OPT_FSID) && ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { - dout("fsid doesn't match\n"); + doutc(cl, "fsid doesn't match\n"); return 0; } if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { - dout("flags differ\n"); + doutc(cl, "flags differ\n"); return 0; } if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { - dout("client is blocklisted (and CLEANRECOVER is not set)\n"); + doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n"); return 0; } if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { - dout("client has been forcibly unmounted\n"); + doutc(cl, "client has been forcibly unmounted\n"); return 0; } @@ -1322,9 +1329,9 @@ static int ceph_get_tree(struct fs_context *fc) goto out; } - if (ceph_sb_to_client(sb) != fsc) { + if (ceph_sb_to_fs_client(sb) != fsc) { destroy_fs_client(fsc); - fsc = ceph_sb_to_client(sb); + fsc = ceph_sb_to_fs_client(sb); dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); @@ -1338,8 +1345,9 @@ static int ceph_get_tree(struct fs_context *fc) err = PTR_ERR(res); goto out_splat; } - dout("root %p inode %p ino %llx.%llx\n", res, - d_inode(res), ceph_vinop(d_inode(res))); + + doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res, + d_inode(res), ceph_vinop(d_inode(res))); fc->root = fsc->sb->s_root; return 0; @@ -1377,7 +1385,7 @@ static int ceph_reconfigure_fc(struct fs_context *fc) struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; struct super_block *sb = fc->root->d_sb; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); if (err) @@ -1397,7 +1405,8 @@ static int ceph_reconfigure_fc(struct fs_context *fc) kfree(fsc->mount_options->mon_addr); fsc->mount_options->mon_addr = fsopt->mon_addr; fsopt->mon_addr = NULL; - pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); + pr_notice_client(fsc->client, + "monitor addresses recorded, but not used for reconnection"); } sync_filesystem(sb); @@ -1516,11 +1525,12 @@ void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) static void ceph_kill_sb(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; bool wait; - dout("kill_sb %p\n", s); + doutc(cl, "%p\n", s); ceph_mdsc_pre_umount(mdsc); flush_fs_workqueues(fsc); @@ -1551,9 +1561,9 @@ static void ceph_kill_sb(struct super_block *s) &mdsc->stopping_waiter, fsc->client->options->mount_timeout); if (!timeleft) /* timed out */ - pr_warn("umount timed out, %ld\n", timeleft); + pr_warn_client(cl, "umount timed out, %ld\n", timeleft); else if (timeleft < 0) /* killed */ - pr_warn("umount was killed, %ld\n", timeleft); + pr_warn_client(cl, "umount was killed, %ld\n", timeleft); } mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; @@ -1572,13 +1582,13 @@ static struct file_system_type ceph_fs_type = { .name = "ceph", .init_fs_context = ceph_init_fs_context, .kill_sb = ceph_kill_sb, - .fs_flags = FS_RENAME_DOES_D_MOVE, + .fs_flags = FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ceph"); int ceph_force_reconnect(struct super_block *sb) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); int err = 0; fsc->mount_state = CEPH_MOUNT_RECOVER; @@ -1671,6 +1681,11 @@ static const struct kernel_param_ops param_ops_mount_syntax = { module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); +bool enable_unsafe_idmap = false; +module_param(enable_unsafe_idmap, bool, 0644); +MODULE_PARM_DESC(enable_unsafe_idmap, + "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID"); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 98844fc8a2f7..fe0f64a0acb2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -488,13 +488,13 @@ ceph_inode(const struct inode *inode) } static inline struct ceph_fs_client * -ceph_inode_to_client(const struct inode *inode) +ceph_inode_to_fs_client(const struct inode *inode) { return (struct ceph_fs_client *)inode->i_sb->s_fs_info; } static inline struct ceph_fs_client * -ceph_sb_to_client(const struct super_block *sb) +ceph_sb_to_fs_client(const struct super_block *sb) { return (struct ceph_fs_client *)sb->s_fs_info; } @@ -502,7 +502,13 @@ ceph_sb_to_client(const struct super_block *sb) static inline struct ceph_mds_client * ceph_sb_to_mdsc(const struct super_block *sb) { - return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc; + return (struct ceph_mds_client *)ceph_sb_to_fs_client(sb)->mdsc; +} + +static inline struct ceph_client * +ceph_inode_to_client(const struct inode *inode) +{ + return (struct ceph_client *)ceph_inode_to_fs_client(inode)->client; } static inline struct ceph_vino @@ -558,7 +564,7 @@ static inline u64 ceph_snap(struct inode *inode) */ static inline u64 ceph_present_ino(struct super_block *sb, u64 ino) { - if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))) + if (unlikely(ceph_test_mount_opt(ceph_sb_to_fs_client(sb), INO32))) return ceph_ino_to_ino32(ino); return ino; } @@ -1094,8 +1100,8 @@ struct ceph_iattr { struct ceph_fscrypt_auth *fscrypt_auth; }; -extern int __ceph_setattr(struct inode *inode, struct iattr *attr, - struct ceph_iattr *cia); +extern int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, + struct iattr *attr, struct ceph_iattr *cia); extern int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct mnt_idmap *idmap, @@ -1106,7 +1112,7 @@ void ceph_inode_shutdown(struct inode *inode); static inline bool ceph_inode_is_shutdown(struct inode *inode) { unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int state = READ_ONCE(fsc->mount_state); return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN; @@ -1223,7 +1229,8 @@ extern void ceph_add_cap(struct inode *inode, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); -extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release); +extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release); extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 097ce7f74073..e066a556eccb 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -57,7 +57,8 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_string *pool_ns; s64 pool = ci->i_layout.pool_id; @@ -69,7 +70,7 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); - dout("ceph_vxattrcb_layout %p\n", &ci->netfs.inode); + doutc(cl, "%p\n", &ci->netfs.inode); down_read(&osdc->lock); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); if (pool_name) { @@ -161,7 +162,7 @@ static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, char *val, size_t size) { ssize_t ret; - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; s64 pool = ci->i_layout.pool_id; const char *pool_name; @@ -313,7 +314,7 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid); } @@ -321,7 +322,7 @@ static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci, static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); return ceph_fmt_xattr(val, size, "client%lld", ceph_client_gid(fsc->client)); @@ -570,6 +571,8 @@ static int __set_xattr(struct ceph_inode_info *ci, int flags, int update_xattr, struct ceph_inode_xattr **newxattr) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; @@ -626,7 +629,7 @@ static int __set_xattr(struct ceph_inode_info *ci, xattr->should_free_name = update_xattr; ci->i_xattrs.count++; - dout("%s count=%d\n", __func__, ci->i_xattrs.count); + doutc(cl, "count=%d\n", ci->i_xattrs.count); } else { kfree(*newxattr); *newxattr = NULL; @@ -654,13 +657,13 @@ static int __set_xattr(struct ceph_inode_info *ci, if (new) { rb_link_node(&xattr->node, parent, p); rb_insert_color(&xattr->node, &ci->i_xattrs.index); - dout("%s p=%p\n", __func__, p); + doutc(cl, "p=%p\n", p); } - dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__, - ceph_vinop(&ci->netfs.inode), xattr, name_len, name, - min(val_len, MAX_XATTR_VAL_PRINT_LEN), val, - val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : ""); + doutc(cl, "added %p %llx.%llx xattr %p %.*s=%.*s%s\n", inode, + ceph_vinop(inode), xattr, name_len, name, min(val_len, + MAX_XATTR_VAL_PRINT_LEN), val, + val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : ""); return 0; } @@ -668,6 +671,7 @@ static int __set_xattr(struct ceph_inode_info *ci, static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, const char *name) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; @@ -688,13 +692,13 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, else { int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN); - dout("%s %s: found %.*s%s\n", __func__, name, len, - xattr->val, xattr->val_len > len ? "..." : ""); + doutc(cl, "%s found %.*s%s\n", name, len, xattr->val, + xattr->val_len > len ? "..." : ""); return xattr; } } - dout("%s %s: not found\n", __func__, name); + doutc(cl, "%s not found\n", name); return NULL; } @@ -735,19 +739,20 @@ static int __remove_xattr(struct ceph_inode_info *ci, static char *__copy_xattr_names(struct ceph_inode_info *ci, char *dest) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; p = rb_first(&ci->i_xattrs.index); - dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); + doutc(cl, "count=%d\n", ci->i_xattrs.count); while (p) { xattr = rb_entry(p, struct ceph_inode_xattr, node); memcpy(dest, xattr->name, xattr->name_len); dest[xattr->name_len] = '\0'; - dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, - xattr->name_len, ci->i_xattrs.names_size); + doutc(cl, "dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, + xattr->name_len, ci->i_xattrs.names_size); dest += xattr->name_len + 1; p = rb_next(p); @@ -758,19 +763,19 @@ static char *__copy_xattr_names(struct ceph_inode_info *ci, void __ceph_destroy_xattrs(struct ceph_inode_info *ci) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node *p, *tmp; struct ceph_inode_xattr *xattr = NULL; p = rb_first(&ci->i_xattrs.index); - dout("__ceph_destroy_xattrs p=%p\n", p); + doutc(cl, "p=%p\n", p); while (p) { xattr = rb_entry(p, struct ceph_inode_xattr, node); tmp = p; p = rb_next(tmp); - dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, - xattr->name_len, xattr->name); + doutc(cl, "next p=%p (%.*s)\n", p, xattr->name_len, xattr->name); rb_erase(tmp, &ci->i_xattrs.index); __free_xattr(xattr); @@ -787,6 +792,7 @@ static int __build_xattrs(struct inode *inode) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { + struct ceph_client *cl = ceph_inode_to_client(inode); u32 namelen; u32 numattr = 0; void *p, *end; @@ -798,8 +804,8 @@ static int __build_xattrs(struct inode *inode) int err = 0; int i; - dout("__build_xattrs() len=%d\n", - ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); + doutc(cl, "len=%d\n", + ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); if (ci->i_xattrs.index_version >= ci->i_xattrs.version) return 0; /* already built */ @@ -874,6 +880,8 @@ bad: static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, int val_size) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); + /* * 4 bytes for the length, and additional 4 bytes per each xattr name, * 4 bytes per each value @@ -881,9 +889,8 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, int size = 4 + ci->i_xattrs.count*(4 + 4) + ci->i_xattrs.names_size + ci->i_xattrs.vals_size; - dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", - ci->i_xattrs.count, ci->i_xattrs.names_size, - ci->i_xattrs.vals_size); + doutc(cl, "c=%d names.size=%d vals.size=%d\n", ci->i_xattrs.count, + ci->i_xattrs.names_size, ci->i_xattrs.vals_size); if (name_size) size += 4 + 4 + name_size + val_size; @@ -899,12 +906,14 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, */ struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; struct ceph_buffer *old_blob = NULL; void *dest; - dout("__build_xattrs_blob %p\n", &ci->netfs.inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (ci->i_xattrs.dirty) { int need = __get_required_blob_size(ci, 0, 0); @@ -962,6 +971,7 @@ static inline int __get_request_mask(struct inode *in) { ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, size_t size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_xattr *xattr; struct ceph_vxattr *vxattr; @@ -1000,8 +1010,9 @@ handle_non_vxattrs: req_mask = __get_request_mask(inode); spin_lock(&ci->i_ceph_lock); - dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name, - ci->i_xattrs.version, ci->i_xattrs.index_version); + doutc(cl, "%p %llx.%llx name '%s' ver=%lld index_ver=%lld\n", inode, + ceph_vinop(inode), name, ci->i_xattrs.version, + ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || !((req_mask & CEPH_CAP_XATTR_SHARED) || @@ -1010,8 +1021,9 @@ handle_non_vxattrs: /* security module gets xattr while filling trace */ if (current->journal_info) { - pr_warn_ratelimited("sync getxattr %p " - "during filling trace\n", inode); + pr_warn_ratelimited_client(cl, + "sync %p %llx.%llx during filling trace\n", + inode, ceph_vinop(inode)); return -EBUSY; } @@ -1053,14 +1065,16 @@ out: ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) { struct inode *inode = d_inode(dentry); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); bool len_only = (size == 0); u32 namelen; int err; spin_lock(&ci->i_ceph_lock); - dout("listxattr %p ver=%lld index_ver=%lld\n", inode, - ci->i_xattrs.version, ci->i_xattrs.index_version); + doutc(cl, "%p %llx.%llx ver=%lld index_ver=%lld\n", inode, + ceph_vinop(inode), ci->i_xattrs.version, + ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) { @@ -1094,7 +1108,8 @@ out: static int ceph_sync_setxattr(struct inode *inode, const char *name, const char *value, size_t size, int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1119,7 +1134,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, flags |= CEPH_XATTR_REMOVE; } - dout("setxattr value size: %zu\n", size); + doutc(cl, "name %s value size %zu\n", name, size); /* do request */ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); @@ -1148,10 +1163,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, req->r_num_caps = 1; req->r_inode_drop = CEPH_CAP_XATTR_SHARED; - dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); + doutc(cl, "xattr.ver (before): %lld\n", ci->i_xattrs.version); err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); - dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); + doutc(cl, "xattr.ver (after): %lld\n", ci->i_xattrs.version); out: if (pagelist) @@ -1162,9 +1177,10 @@ out: int __ceph_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf = NULL; struct ceph_buffer *old_blob = NULL; int issued; @@ -1220,9 +1236,9 @@ retry: required_blob_size = __get_required_blob_size(ci, name_len, val_len); if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) || (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) { - dout("%s do sync setxattr: version: %llu size: %d max: %llu\n", - __func__, ci->i_xattrs.version, required_blob_size, - mdsc->mdsmap->m_max_xattr_size); + doutc(cl, "sync version: %llu size: %d max: %llu\n", + ci->i_xattrs.version, required_blob_size, + mdsc->mdsmap->m_max_xattr_size); goto do_sync; } @@ -1236,8 +1252,8 @@ retry: } } - dout("setxattr %p name '%s' issued %s\n", inode, name, - ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx name '%s' issued %s\n", inode, + ceph_vinop(inode), name, ceph_cap_string(issued)); __build_xattrs(inode); if (!ci->i_xattrs.prealloc_blob || @@ -1246,7 +1262,8 @@ retry: spin_unlock(&ci->i_ceph_lock); ceph_buffer_put(old_blob); /* Shouldn't be required */ - dout(" pre-allocating new blob size=%d\n", required_blob_size); + doutc(cl, " pre-allocating new blob size=%d\n", + required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) goto do_sync_unlocked; @@ -1285,8 +1302,9 @@ do_sync_unlocked: /* security module set xattr while filling trace */ if (current->journal_info) { - pr_warn_ratelimited("sync setxattr %p " - "during filling trace\n", inode); + pr_warn_ratelimited_client(cl, + "sync %p %llx.%llx during filling trace\n", + inode, ceph_vinop(inode)); err = -EBUSY; } else { err = ceph_sync_setxattr(inode, name, value, size, flags); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index b784c393a504..77240953a92e 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -46,6 +46,7 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf) EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS; u64 storage_space, remaining_space, max_variable_size; + u64 id = huge_encode_dev(dentry->d_sb->s_dev); efi_status_t status; /* Some UEFI firmware does not implement QueryVariableInfo() */ @@ -69,6 +70,7 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = storage_space; buf->f_bfree = remaining_space; buf->f_type = dentry->d_sb->s_magic; + buf->f_fsid = u64_to_fsid(id); /* * In f_bavail we declare the free space that the kernel will allow writing diff --git a/fs/efs/super.c b/fs/efs/super.c index b287f47c165b..f17fdac76b2e 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -123,6 +123,7 @@ static const struct super_operations efs_superblock_operations = { }; static const struct export_operations efs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = efs_fh_to_dentry, .fh_to_parent = efs_fh_to_parent, .get_parent = efs_get_parent, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 976dc39a88f7..3789d6224513 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -567,6 +567,7 @@ static struct dentry *erofs_get_parent(struct dentry *child) } static const struct export_operations erofs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = erofs_fh_to_dentry, .fh_to_parent = erofs_fh_to_parent, .get_parent = erofs_get_parent, diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 02c4e2937879..bfdfafe00993 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -295,6 +295,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_valid & ATTR_SIZE) inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + setattr_copy(&nop_mnt_idmap, inode, attr); exfat_truncate_inode_atime(inode); if (attr->ia_valid & ATTR_SIZE) { diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 875234179d1f..e7ff58b8e68c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -56,18 +56,18 @@ int __exfat_write_inode(struct inode *inode, int sync) &ep->dentry.file.create_time, &ep->dentry.file.create_date, &ep->dentry.file.create_time_cs); + ts = inode_get_mtime(inode); exfat_set_entry_time(sbi, &ts, &ep->dentry.file.modify_tz, &ep->dentry.file.modify_time, &ep->dentry.file.modify_date, &ep->dentry.file.modify_time_cs); - inode_set_mtime_to_ts(inode, ts); + ts = inode_get_atime(inode); exfat_set_entry_time(sbi, &ts, &ep->dentry.file.access_tz, &ep->dentry.file.access_time, &ep->dentry.file.access_date, NULL); - inode_set_atime_to_ts(inode, ts); /* File size should be zero if there is no cluster allocated */ on_disk_size = i_size_read(inode); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c20704aa21b3..3ae0154c5680 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -342,43 +342,30 @@ out: return error; } +#define FILEID_INO64_GEN_LEN 3 + /** - * export_encode_fh - default export_operations->encode_fh function + * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id * @inode: the object to encode * @fid: where to store the file handle fragment - * @max_len: maximum length to store there - * @parent: parent directory inode, if wanted + * @max_len: maximum length to store there (in 4 byte units) * - * This default encode_fh function assumes that the 32 inode number - * is suitable for locating an inode, and that the generation number - * can be used to check that it is still valid. It places them in the - * filehandle fragment where export_decode_fh expects to find them. + * This generic function is used to encode a non-decodeable file id for + * fanotify for filesystems that do not support NFS export. */ -static int export_encode_fh(struct inode *inode, struct fid *fid, - int *max_len, struct inode *parent) +static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid, + int *max_len) { - int len = *max_len; - int type = FILEID_INO32_GEN; - - if (parent && (len < 4)) { - *max_len = 4; - return FILEID_INVALID; - } else if (len < 2) { - *max_len = 2; + if (*max_len < FILEID_INO64_GEN_LEN) { + *max_len = FILEID_INO64_GEN_LEN; return FILEID_INVALID; } - len = 2; - fid->i32.ino = inode->i_ino; - fid->i32.gen = inode->i_generation; - if (parent) { - fid->i32.parent_ino = parent->i_ino; - fid->i32.parent_gen = parent->i_generation; - len = 4; - type = FILEID_INO32_GEN_PARENT; - } - *max_len = len; - return type; + fid->i64.ino = inode->i_ino; + fid->i64.gen = inode->i_generation; + *max_len = FILEID_INO64_GEN_LEN; + + return FILEID_INO64_GEN; } /** @@ -396,17 +383,13 @@ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, { const struct export_operations *nop = inode->i_sb->s_export_op; - /* - * If a decodeable file handle was requested, we need to make sure that - * filesystem can decode file handles. - */ - if (nop && !(flags & EXPORT_FH_FID) && !nop->fh_to_dentry) + if (!exportfs_can_encode_fh(nop, flags)) return -EOPNOTSUPP; - if (nop && nop->encode_fh) - return nop->encode_fh(inode, fid->raw, max_len, parent); + if (!nop && (flags & EXPORT_FH_FID)) + return exportfs_encode_ino64_fid(inode, fid, max_len); - return export_encode_fh(inode, fid, max_len, parent); + return nop->encode_fh(inode, fid->raw, max_len, parent); } EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); @@ -456,7 +439,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, /* * Try to get any dentry for the given file handle from the filesystem. */ - if (!nop || !nop->fh_to_dentry) + if (!exportfs_can_decode_fh(nop)) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); if (IS_ERR_OR_NULL(result)) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 645ee6142f69..01f9addc8b1f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -397,6 +397,7 @@ static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, } static const struct export_operations ext2_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ext2_fh_to_dentry, .fh_to_parent = ext2_fh_to_parent, .get_parent = ext2_get_parent, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 54a9dde7483a..c5fcf377ab1f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1654,6 +1654,7 @@ static const struct super_operations ext4_sops = { }; static const struct export_operations ext4_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, .get_parent = ext4_get_parent, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1ed68158bac5..033af907c3b1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3330,6 +3330,7 @@ static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, } static const struct export_operations f2fs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = f2fs_fh_to_dentry, .fh_to_parent = f2fs_fh_to_parent, .get_parent = f2fs_get_parent, diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index 3626eb585a98..c52e63e10d35 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -279,6 +279,7 @@ static struct dentry *fat_get_parent(struct dentry *child_dir) } const struct export_operations fat_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = fat_fh_to_dentry, .fh_to_parent = fat_fh_to_parent, .get_parent = fat_get_parent, diff --git a/fs/fhandle.c b/fs/fhandle.c index 6ea8d35a9382..18b3ba8dc8ea 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -26,12 +26,8 @@ static long do_sys_name_to_handle(const struct path *path, /* * We need to make sure whether the file system support decoding of * the file handle if decodeable file handle was requested. - * Otherwise, even empty export_operations are sufficient to opt-in - * to encoding FIDs. */ - if (!path->dentry->d_sb->s_export_op || - (!(fh_flags & EXPORT_FH_FID) && - !path->dentry->d_sb->s_export_op->fh_to_dentry)) + if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags)) return -EOPNOTSUPP; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 310d73e254df..e6e2a2185e7c 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -76,6 +76,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) { struct vxfs_sb_info *infp = VXFS_SBI(dentry->d_sb); struct vxfs_sb *raw_sb = infp->vsi_raw; + u64 id = huge_encode_dev(dentry->d_sb->s_bdev->bd_dev); bufp->f_type = VXFS_SUPER_MAGIC; bufp->f_bsize = dentry->d_sb->s_blocksize; @@ -84,6 +85,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) bufp->f_bavail = 0; bufp->f_files = 0; bufp->f_ffree = fs32_to_cpu(infp, raw_sb->vs_ifree); + bufp->f_fsid = u64_to_fsid(id); bufp->f_namelen = VXFS_NAMELEN; return 0; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index caa8121ad99c..74d4f09d5827 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -999,7 +999,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, } *max_len = len; - return parent ? 0x82 : 0x81; + return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, @@ -1007,7 +1007,8 @@ static struct dentry *fuse_fh_to_dentry(struct super_block *sb, { struct fuse_inode_handle handle; - if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) + if ((fh_type != FILEID_INO64_GEN && + fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) return NULL; handle.nodeid = (u64) fid->raw[0] << 32; @@ -1021,7 +1022,7 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb, { struct fuse_inode_handle parent; - if (fh_type != 0x82 || fh_len < 6) + if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) return NULL; parent.nodeid = (u64) fid->raw[3] << 32; diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index d4deb2b19959..82f5b09c04e6 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -11,9 +11,9 @@ #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) -extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); -extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, - struct posix_acl *acl, int type); +struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); +int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); +int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, + struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 6b060fc9e260..9611bfceda4b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -155,7 +155,7 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) + if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE)) goto out; if (folio_test_checked(folio) || current->journal_info) goto out_ignore; @@ -214,12 +214,12 @@ static int gfs2_write_jdata_batch(struct address_space *mapping, unsigned nrblocks; int i; int ret; - int nr_pages = 0; + size_t size = 0; int nr_folios = folio_batch_count(fbatch); for (i = 0; i < nr_folios; i++) - nr_pages += folio_nr_pages(fbatch->folios[i]); - nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits); + size += folio_size(fbatch->folios[i]); + nrblocks = size >> inode->i_blkbits; ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); if (ret < 0) @@ -403,27 +403,27 @@ static int gfs2_jdata_writepages(struct address_space *mapping, } /** - * stuffed_readpage - Fill in a Linux page with stuffed file data + * stuffed_readpage - Fill in a Linux folio with stuffed file data * @ip: the inode - * @page: the page + * @folio: the folio * * Returns: errno */ -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +static int stuffed_readpage(struct gfs2_inode *ip, struct folio *folio) { struct buffer_head *dibh; - u64 dsize = i_size_read(&ip->i_inode); - void *kaddr; + size_t i_size = i_size_read(&ip->i_inode); + void *data; int error; /* * Due to the order of unstuffing files and ->fault(), we can be - * asked for a zero page in the case of a stuffed file being extended, + * asked for a zero folio in the case of a stuffed file being extended, * so we need to supply one here. It doesn't happen often. */ - if (unlikely(page->index)) { - zero_user(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (unlikely(folio->index)) { + folio_zero_range(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); return 0; } @@ -431,13 +431,11 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) if (error) return error; - kaddr = kmap_local_page(page); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); - memset(kaddr + dsize, 0, PAGE_SIZE - dsize); - kunmap_local(kaddr); - flush_dcache_page(page); + data = dibh->b_data + sizeof(struct gfs2_dinode); + memcpy_to_folio(folio, 0, data, i_size); + folio_zero_range(folio, i_size, folio_size(folio) - i_size); brelse(dibh); - SetPageUptodate(page); + folio_mark_uptodate(folio); return 0; } @@ -458,7 +456,7 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { error = iomap_read_folio(folio, &gfs2_iomap_ops); } else if (gfs2_is_stuffed(ip)) { - error = stuffed_readpage(ip, &folio->page); + error = stuffed_readpage(ip, folio); folio_unlock(folio); } else { error = mpage_read_folio(folio, gfs2_block_map); @@ -479,31 +477,29 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) * */ -int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, - unsigned size) +ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, + size_t size) { struct address_space *mapping = ip->i_inode.i_mapping; unsigned long index = *pos >> PAGE_SHIFT; - unsigned offset = *pos & (PAGE_SIZE - 1); - unsigned copied = 0; - unsigned amt; - struct page *page; + size_t copied = 0; do { - page = read_cache_page(mapping, index, gfs2_read_folio, NULL); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -EINTR) + size_t offset, chunk; + struct folio *folio; + + folio = read_cache_folio(mapping, index, gfs2_read_folio, NULL); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -EINTR) continue; - return PTR_ERR(page); + return PTR_ERR(folio); } - amt = size - copied; - if (offset + size > PAGE_SIZE) - amt = PAGE_SIZE - offset; - memcpy_from_page(buf + copied, page, offset, amt); - put_page(page); - copied += amt; - index++; - offset = 0; + offset = *pos + copied - folio_pos(folio); + chunk = min(size - copied, folio_size(folio) - offset); + memcpy_from_folio(buf + copied, folio, offset, chunk); + index = folio_next_index(folio); + folio_put(folio); + copied += chunk; } while(copied < size); (*pos) += size; return size; diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index f08322ef41cf..a10c4334d248 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -8,8 +8,8 @@ #include "incore.h" -extern void adjust_fs_space(struct inode *inode); -extern void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, - size_t from, size_t len); +void adjust_fs_space(struct inode *inode); +void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, + size_t from, size_t len); #endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6eb6f1bd9e34..d9ccfd27e4f1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -104,7 +104,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) and write it out to disk */ unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) goto out_brelse; if (isdir) { @@ -315,6 +315,12 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) } } +static inline struct buffer_head * +metapath_dibh(struct metapath *mp) +{ + return mp->mp_bh[0]; +} + static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, unsigned int x, unsigned int h) { @@ -413,13 +419,12 @@ static void release_metapath(struct metapath *mp) * gfs2_extent_length - Returns length of an extent of blocks * @bh: The metadata block * @ptr: Current position in @bh - * @limit: Max extent length to return * @eob: Set to 1 if we hit "end of block" * * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) +static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob) { const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; @@ -658,7 +663,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct buffer_head *dibh = mp->mp_bh[0]; + struct buffer_head *dibh = metapath_dibh(mp); u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; size_t dblks = iomap->length >> inode->i_blkbits; @@ -700,7 +705,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, i = mp->mp_aheight; do { n = blks - alloced; - ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); + ret = gfs2_alloc_blocks(ip, &bn, &n, 0); if (ret) goto out; alloced += n; @@ -911,7 +916,7 @@ unstuff: goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh, ptr, len, &eob); + len = gfs2_extent_length(bh, ptr, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index e5b7d17131ed..4e8b1e8ebdf3 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -46,24 +46,24 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, extern const struct iomap_ops gfs2_iomap_ops; extern const struct iomap_writeback_ops gfs2_writeback_ops; -extern int gfs2_unstuff_dinode(struct gfs2_inode *ip); -extern int gfs2_block_map(struct inode *inode, sector_t lblock, - struct buffer_head *bh, int create); -extern int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, - struct iomap *iomap); -extern int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, - struct iomap *iomap); -extern int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, - unsigned int *extlen); -extern int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, - unsigned *extlen, bool *new); -extern int gfs2_setattr_size(struct inode *inode, u64 size); -extern int gfs2_truncatei_resume(struct gfs2_inode *ip); -extern int gfs2_file_dealloc(struct gfs2_inode *ip); -extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len); -extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); -extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); -extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length); +int gfs2_unstuff_dinode(struct gfs2_inode *ip); +int gfs2_block_map(struct inode *inode, sector_t lblock, + struct buffer_head *bh, int create); +int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, + struct iomap *iomap); +int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, + struct iomap *iomap); +int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, + unsigned int *extlen); +int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, + unsigned *extlen, bool *new); +int gfs2_setattr_size(struct inode *inode, u64 size); +int gfs2_truncatei_resume(struct gfs2_inode *ip); +int gfs2_file_dealloc(struct gfs2_inode *ip); +int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, + unsigned int len); +int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); +void gfs2_free_journal_extents(struct gfs2_jdesc *jd); +int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 61ddd03ea111..560e4624c09f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -868,7 +868,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct gfs2_dirent *dent; struct timespec64 tv = current_time(inode); - error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &bn, &n, 0); if (error) return NULL; bh = gfs2_meta_new(ip->i_gl, bn); diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 5b76480c17c9..25a857c78b53 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -23,32 +23,32 @@ struct gfs2_diradd { int save_loc; }; -extern struct inode *gfs2_dir_search(struct inode *dir, - const struct qstr *filename, - bool fail_on_exist); -extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, - const struct gfs2_inode *ip); -extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip, struct gfs2_diradd *da); +struct inode *gfs2_dir_search(struct inode *dir, + const struct qstr *filename, + bool fail_on_exist); +int gfs2_dir_check(struct inode *dir, const struct qstr *filename, + const struct gfs2_inode *ip); +int gfs2_dir_add(struct inode *inode, const struct qstr *filename, + const struct gfs2_inode *ip, struct gfs2_diradd *da); static inline void gfs2_dir_no_add(struct gfs2_diradd *da) { brelse(da->bh); da->bh = NULL; } -extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); -extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, - struct file_ra_state *f_ra); -extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - const struct gfs2_inode *nip, unsigned int new_type); +int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); +int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, + struct file_ra_state *f_ra); +int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, + const struct gfs2_inode *nip, unsigned int new_type); -extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); +int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); -extern int gfs2_diradd_alloc_required(struct inode *dir, - const struct qstr *filename, - struct gfs2_diradd *da); -extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, - struct buffer_head **bhp); -extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); +int gfs2_diradd_alloc_required(struct inode *dir, + const struct qstr *filename, + struct gfs2_diradd *da); +int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, + struct buffer_head **bhp); +void gfs2_dir_hash_inval(struct gfs2_inode *ip); static inline u32 gfs2_disk_hash(const char *data, int len) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f2700477a300..4b66efc1a82a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -418,7 +418,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; u64 offset = page_offset(page); unsigned int data_blocks, ind_blocks, rblocks; vm_fault_t ret = VM_FAULT_LOCKED; @@ -1120,14 +1120,16 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out_unlock; - ret = file_update_time(file); - if (ret) - goto out_unlock; - if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; ssize_t buffered, ret2; + /* + * Note that under direct I/O, we don't allow and inode + * timestamp updates, so we're not calling file_update_time() + * here. + */ + ret = gfs2_file_direct_write(iocb, from, &gh); if (ret < 0 || !iov_iter_count(from)) goto out_unlock; @@ -1154,6 +1156,10 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!ret || ret2 > 0) ret += ret2; } else { + ret = file_update_time(file); + if (ret) + goto out_unlock; + ret = gfs2_file_buffered_write(iocb, from, &gh); if (likely(ret > 0)) ret = generic_write_sync(iocb, ret); @@ -1245,7 +1251,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes, max_blks; int error; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d5fa75eac0bf..d6bf1f8c25dc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1524,7 +1524,6 @@ fail: return; } list_add_tail(&gh->gh_list, insert_pt); - gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) sdp->sd_lockstruct.ls_ops->lm_cancel(gl); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c8685ca7d2a2..61197598abfd 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -156,21 +156,6 @@ out: return gh; } -static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_EXCLUSIVE; -} - -static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_DEFERRED; -} - -static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_SHARED; -} - static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) { if (gl->gl_ops->go_flags & GLOF_ASPACE) { @@ -181,40 +166,40 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) return NULL; } -extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - int create, struct gfs2_glock **glp); -extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); -extern void gfs2_glock_put(struct gfs2_glock *gl); -extern void gfs2_glock_queue_put(struct gfs2_glock *gl); +int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + int create, struct gfs2_glock **glp); +struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); +void gfs2_glock_put(struct gfs2_glock *gl); +void gfs2_glock_queue_put(struct gfs2_glock *gl); -extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, - u16 flags, struct gfs2_holder *gh, - unsigned long ip); +void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh, + unsigned long ip); static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh) { __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); } -extern void gfs2_holder_reinit(unsigned int state, u16 flags, - struct gfs2_holder *gh); -extern void gfs2_holder_uninit(struct gfs2_holder *gh); -extern int gfs2_glock_nq(struct gfs2_holder *gh); -extern int gfs2_glock_poll(struct gfs2_holder *gh); -extern int gfs2_instantiate(struct gfs2_holder *gh); -extern int gfs2_glock_holder_ready(struct gfs2_holder *gh); -extern int gfs2_glock_wait(struct gfs2_holder *gh); -extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_glock_dq(struct gfs2_holder *gh); -extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); -extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); -extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, u16 flags, - struct gfs2_holder *gh); -extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, +void gfs2_holder_reinit(unsigned int state, u16 flags, + struct gfs2_holder *gh); +void gfs2_holder_uninit(struct gfs2_holder *gh); +int gfs2_glock_nq(struct gfs2_holder *gh); +int gfs2_glock_poll(struct gfs2_holder *gh); +int gfs2_instantiate(struct gfs2_holder *gh); +int gfs2_glock_holder_ready(struct gfs2_holder *gh); +int gfs2_glock_wait(struct gfs2_holder *gh); +int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_glock_dq(struct gfs2_holder *gh); +void gfs2_glock_dq_wait(struct gfs2_holder *gh); +void gfs2_glock_dq_uninit(struct gfs2_holder *gh); +int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + unsigned int state, u16 flags, + struct gfs2_holder *gh); +int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ gfs2_dump_glock(NULL, gl, true); \ @@ -228,7 +213,7 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \ while (0) -extern __printf(2, 3) +__printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** @@ -256,27 +241,27 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, return error; } -extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); -extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); -extern bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); -extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); -extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); -extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); -extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); -extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); -extern void gfs2_glock_free(struct gfs2_glock *gl); +void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); +void gfs2_glock_complete(struct gfs2_glock *gl, int ret); +bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); +void gfs2_cancel_delete_work(struct gfs2_glock *gl); +void gfs2_flush_delete_work(struct gfs2_sbd *sdp); +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); +void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); +void gfs2_glock_thaw(struct gfs2_sbd *sdp); +void gfs2_glock_add_to_lru(struct gfs2_glock *gl); +void gfs2_glock_free(struct gfs2_glock *gl); -extern int __init gfs2_glock_init(void); -extern void gfs2_glock_exit(void); +int __init gfs2_glock_init(void); +void gfs2_glock_exit(void); -extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); -extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); -extern void gfs2_register_debugfs(void); -extern void gfs2_unregister_debugfs(void); +void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_register_debugfs(void); +void gfs2_unregister_debugfs(void); -extern void glock_set_object(struct gfs2_glock *gl, void *object); -extern void glock_clear_object(struct gfs2_glock *gl, void *object); +void glock_set_object(struct gfs2_glock *gl, void *object); +void glock_clear_object(struct gfs2_glock *gl, void *object); extern const struct lm_lockops gfs2_dlm_ops; @@ -295,7 +280,7 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) return !list_empty(&gh->gh_list); } -extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); -extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); +void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); +bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e7d334c277a1..b41c78bd2cc0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -615,18 +615,6 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl) } /** - * freeze_go_demote_ok - * @gl: the glock - * - * Always returns 0 - */ - -static int freeze_go_demote_ok(const struct gfs2_glock *gl) -{ - return 0; -} - -/** * iopen_go_callback - schedule the dcache entry for the inode to be deleted * @gl: the glock * @remote: true if this came from a different cluster node @@ -745,7 +733,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { const struct gfs2_glock_operations gfs2_freeze_glops = { .go_xmote_bh = freeze_go_xmote_bh, - .go_demote_ok = freeze_go_demote_ok, .go_callback = freeze_go_callback, .go_type = LM_TYPE_NONDISK, .go_flags = GLOF_NONDISK, diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 695898afcaf1..9341423798df 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -22,7 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; extern const struct gfs2_glock_operations gfs2_journal_glops; extern const struct gfs2_glock_operations *gfs2_glops_list[]; -extern int gfs2_inode_metasync(struct gfs2_glock *gl); -extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); +int gfs2_inode_metasync(struct gfs2_glock *gl); +void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); #endif /* __GLOPS_DOT_H__ */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a8c95c5293c6..95a334d64da2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -863,7 +863,7 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) preempt_enable(); } -extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); +struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip) { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7fe77bc771e5..1b95db2c3aac 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -266,17 +266,18 @@ fail_iput: } -struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) +/** + * gfs2_lookup_meta - Look up an inode in a metadata directory + * @dip: The directory + * @name: The name of the inode + */ +struct inode *gfs2_lookup_meta(struct inode *dip, const char *name) { struct qstr qstr; struct inode *inode; + gfs2_str2qstr(&qstr, name); inode = gfs2_lookupi(dip, &qstr, 1); - /* gfs2_lookupi has inconsistent callers: vfs - * related routines expect NULL for no entry found, - * gfs2_lookup_simple callers expect ENOENT - * and do not check for NULL. - */ if (IS_ERR_OR_NULL(inode)) return inode ? inode : ERR_PTR(-ENOENT); @@ -418,7 +419,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) if (error) goto out_ipreserv; - error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1); if (error) goto out_trans_end; @@ -1867,16 +1868,24 @@ out: int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { + int may_not_block = mask & MAY_NOT_BLOCK; struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_glock *gl; int error; gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (mask & MAY_NOT_BLOCK) + gl = rcu_dereference_check(ip->i_gl, !may_not_block); + if (unlikely(!gl)) { + /* inode is getting torn down, must be RCU mode */ + WARN_ON_ONCE(!may_not_block); + return -ECHILD; + } + if (gfs2_glock_is_locked_by_me(gl) == NULL) { + if (may_not_block) return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } @@ -1921,7 +1930,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) kuid_t ouid, nuid; kgid_t ogid, ngid; int error; - struct gfs2_alloc_parms ap; + struct gfs2_alloc_parms ap = {}; ouid = inode->i_uid; ogid = inode->i_gid; @@ -2154,7 +2163,7 @@ static int gfs2_update_time(struct inode *inode, int flags) int error; gh = gfs2_glock_is_locked_by_me(gl); - if (gh && !gfs2_glock_is_held_excl(gl)) { + if (gh && gl->gl_state != LM_ST_EXCLUSIVE) { gfs2_glock_dq(gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh); error = gfs2_glock_nq(gh); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c8c5814e7295..fd15d1c6b6fb 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -13,9 +13,9 @@ #include "util.h" bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask); -extern int gfs2_internal_read(struct gfs2_inode *ip, - char *buf, loff_t *pos, unsigned size); -extern void gfs2_set_aops(struct inode *inode); +ssize_t gfs2_internal_read(struct gfs2_inode *ip, + char *buf, loff_t *pos, size_t size); +void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { @@ -44,19 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { - inode->i_blocks = blocks << - (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + inode->i_blocks = blocks << (inode->i_blkbits - 9); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { - return inode->i_blocks >> - (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + return inode->i_blocks >> (inode->i_blkbits - 9); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { - change <<= inode->i_blkbits - GFS2_BASIC_BLOCK_SHIFT; + change <<= inode->i_blkbits - 9; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } @@ -88,33 +86,33 @@ err: return -EIO; } -extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino, - unsigned int blktype); -extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, - u64 no_formal_ino, - unsigned int blktype); - -extern int gfs2_inode_refresh(struct gfs2_inode *ip); - -extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root); -extern int gfs2_permission(struct mnt_idmap *idmap, - struct inode *inode, int mask); -extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); -extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -extern int gfs2_open_common(struct inode *inode, struct file *file); -extern loff_t gfs2_seek_data(struct file *file, loff_t offset); -extern loff_t gfs2_seek_hole(struct file *file, loff_t offset); +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, + u64 no_addr, u64 no_formal_ino, + unsigned int blktype); +struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, + u64 no_formal_ino, + unsigned int blktype); + +int gfs2_inode_refresh(struct gfs2_inode *ip); + +struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, + int is_root); +int gfs2_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask); +struct inode *gfs2_lookup_meta(struct inode *dip, const char *name); +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +int gfs2_open_common(struct inode *inode, struct file *file); +loff_t gfs2_seek_data(struct file *file, loff_t offset); +loff_t gfs2_seek_hole(struct file *file, loff_t offset); extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; -extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); -extern int gfs2_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); -extern void gfs2_set_inode_flags(struct inode *inode); - +int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int gfs2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct fileattr *fa); +void gfs2_set_inode_flags(struct inode *inode); + #ifdef CONFIG_GFS2_FS_LOCKING_DLM extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 653cffcbf869..c27b05099c1e 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -70,29 +70,29 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) } } -extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); -extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); -extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); -extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); -extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); -extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); -extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - unsigned int *extra_revokes); -extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - unsigned int *extra_revokes); -extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - u64 seq, u32 tail, u32 lblock, u32 flags, - blk_opf_t op_flags); -extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, - u32 type); -extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); -extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); -extern void log_flush_wait(struct gfs2_sbd *sdp); +void gfs2_ordered_del_inode(struct gfs2_inode *ip); +unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); +void gfs2_remove_from_ail(struct gfs2_bufdata *bd); +bool gfs2_log_is_empty(struct gfs2_sbd *sdp); +void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); +bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + u64 seq, u32 tail, u32 lblock, u32 flags, + blk_opf_t op_flags); +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + u32 type); +void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); +void log_flush_wait(struct gfs2_sbd *sdp); -extern int gfs2_logd(void *data); -extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); -extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); -extern void gfs2_flush_revokes(struct gfs2_sbd *sdp); -extern void gfs2_ail_drain(struct gfs2_sbd *sdp); +int gfs2_logd(void *data); +void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +void gfs2_glock_remove_revoke(struct gfs2_glock *gl); +void gfs2_flush_revokes(struct gfs2_sbd *sdp); +void gfs2_ail_drain(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 1412ffba1d44..07890c7b145d 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -11,16 +11,18 @@ #include "incore.h" extern const struct gfs2_log_operations *gfs2_log_ops[]; -extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); -extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); -extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - struct page *page, unsigned size, unsigned offset, - u64 blkno); -extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); -extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, bool keep_cache); -extern void gfs2_drain_revokes(struct gfs2_sbd *sdp); + +void gfs2_log_incr_head(struct gfs2_sbd *sdp); +u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); +void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + struct page *page, unsigned size, unsigned offset, + u64 blkno); +void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); +void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); +int gfs2_find_jhead(struct gfs2_jdesc *jd, + struct gfs2_log_header_host *head, bool keep_cache); +void gfs2_drain_revokes(struct gfs2_sbd *sdp); + static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { return sdp->sd_ldptrs; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index d0a58cdd433a..831d988c2ceb 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -50,21 +50,21 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) return inode->i_sb->s_fs_info; } -extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); -extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, - int rahead, struct buffer_head **bhp); -extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, - int create); +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); +int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, + int rahead, struct buffer_head **bhp); +int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, + int create); enum { REMOVE_JDATA = 0, REMOVE_META = 1, }; -extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta); -extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); -extern int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, - struct buffer_head **bhp); +void gfs2_remove_from_journal(struct buffer_head *bh, int meta); +void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); +int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ecf789b7168c..b108c5d26839 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -292,8 +292,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) return error; } - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) / sizeof(u64); @@ -648,7 +647,7 @@ static int init_statfs(struct gfs2_sbd *sdp) struct gfs2_jdesc *jd; struct gfs2_inode *ip; - sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); + sdp->sd_statfs_inode = gfs2_lookup_meta(master, "statfs"); if (IS_ERR(sdp->sd_statfs_inode)) { error = PTR_ERR(sdp->sd_statfs_inode); fs_err(sdp, "can't read in statfs inode: %d\n", error); @@ -657,7 +656,7 @@ static int init_statfs(struct gfs2_sbd *sdp) if (sdp->sd_args.ar_spectator) goto out; - pn = gfs2_lookup_simple(master, "per_node"); + pn = gfs2_lookup_meta(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -674,7 +673,7 @@ static int init_statfs(struct gfs2_sbd *sdp) goto free_local; } sprintf(buf, "statfs_change%u", jd->jd_jid); - lsi->si_sc_inode = gfs2_lookup_simple(pn, buf); + lsi->si_sc_inode = gfs2_lookup_meta(pn, buf); if (IS_ERR(lsi->si_sc_inode)) { error = PTR_ERR(lsi->si_sc_inode); fs_err(sdp, "can't find local \"sc\" file#%u: %d\n", @@ -739,7 +738,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_statfs; - sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); + sdp->sd_jindex = gfs2_lookup_meta(master, "jindex"); if (IS_ERR(sdp->sd_jindex)) { fs_err(sdp, "can't lookup journal index: %d\n", error); return PTR_ERR(sdp->sd_jindex); @@ -888,7 +887,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail; /* Read in the resource index inode */ - sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); + sdp->sd_rindex = gfs2_lookup_meta(master, "rindex"); if (IS_ERR(sdp->sd_rindex)) { error = PTR_ERR(sdp->sd_rindex); fs_err(sdp, "can't get resource index inode: %d\n", error); @@ -897,7 +896,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) sdp->sd_rindex_uptodate = 0; /* Read in the quota inode */ - sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); + sdp->sd_quota_inode = gfs2_lookup_meta(master, "quota"); if (IS_ERR(sdp->sd_quota_inode)) { error = PTR_ERR(sdp->sd_quota_inode); fs_err(sdp, "can't get quota file inode: %d\n", error); @@ -941,7 +940,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_qc_gh; - pn = gfs2_lookup_simple(master, "per_node"); + pn = gfs2_lookup_meta(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -949,7 +948,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) } sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); - sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf); + sdp->sd_qc_inode = gfs2_lookup_meta(pn, buf); if (IS_ERR(sdp->sd_qc_inode)) { error = PTR_ERR(sdp->sd_qc_inode); fs_err(sdp, "can't find local \"qc\" file: %d\n", error); @@ -1187,10 +1186,9 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) /* Set up the buffer cache and fill in some fake block size values to allow us to read-in the on-disk superblock. */ - sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK); + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512); sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit; @@ -1278,10 +1276,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) if (!sb_rdonly(sb)) { error = init_threads(sdp); - if (error) { - gfs2_withdraw_delayed(sdp); + if (error) goto fail_per_node; - } } error = gfs2_freeze_lock_shared(sdp); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 5cbbc1a46a92..95dae7838b4e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -470,6 +470,17 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, (sync_gen && (qd->qd_sync_gen >= *sync_gen))) return 0; + /* + * If qd_change is 0 it means a pending quota change was negated. + * We should not sync it, but we still have a qd reference and slot + * reference taken by gfs2_quota_change -> do_qc that need to be put. + */ + if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) { + slot_put(qd); + qd_put(qd); + return 0; + } + if (!lockref_get_not_dead(&qd->qd_lockref)) return 0; @@ -912,7 +923,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -1086,8 +1097,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) u32 x; int error; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON && - sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; error = gfs2_quota_hold(ip, uid, gid); @@ -1194,17 +1204,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) #define MAX_LINE 256 -static int print_message(struct gfs2_quota_data *qd, char *type) +static void print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_sbd; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) { fs_info(sdp, "quota %s for %s %u\n", type, (qd->qd_id.type == USRQUOTA) ? "user" : "group", from_kqid(&init_user_ns, qd->qd_id)); - - return 0; + } } /** @@ -1274,7 +1283,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, * HZ)) { quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); - error = print_message(qd, "warning"); + print_message(qd, "warning"); + error = 0; qd->qd_last_warn = jiffies; } } @@ -1288,8 +1298,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 x; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - if ((sdp->sd_args.ar_quota != GFS2_QUOTA_ON && - sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) || + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF || gfs2_assert_warn(sdp, change)) return; if (ip->i_diskflags & GFS2_DIF_SYSTEM) @@ -1746,7 +1755,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 36f54b426b0c..f462d9cb3087 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -15,27 +15,27 @@ struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID -extern int gfs2_qa_get(struct gfs2_inode *ip); -extern void gfs2_qa_put(struct gfs2_inode *ip); -extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); -extern void gfs2_quota_unhold(struct gfs2_inode *ip); +int gfs2_qa_get(struct gfs2_inode *ip); +void gfs2_qa_put(struct gfs2_inode *ip); +int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +void gfs2_quota_unhold(struct gfs2_inode *ip); -extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); -extern void gfs2_quota_unlock(struct gfs2_inode *ip); +int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, - struct gfs2_alloc_parms *ap); -extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, - kuid_t uid, kgid_t gid); +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap); +void gfs2_quota_change(struct gfs2_inode *ip, s64 change, + kuid_t uid, kgid_t gid); -extern int gfs2_quota_sync(struct super_block *sb, int type); -extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); +int gfs2_quota_sync(struct super_block *sb, int type); +int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); -extern int gfs2_quota_init(struct gfs2_sbd *sdp); -extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); -extern int gfs2_quotad(void *data); +int gfs2_quota_init(struct gfs2_sbd *sdp); +void gfs2_quota_cleanup(struct gfs2_sbd *sdp); +int gfs2_quotad(void *data); -extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); +void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) @@ -50,8 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON && - sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) + if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) return 0; ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) @@ -63,6 +62,7 @@ extern const struct quotactl_ops gfs2_quotactl_ops; int __init gfs2_qd_shrinker_init(void); void gfs2_qd_shrinker_exit(void); extern struct list_lru gfs2_qd_lru; -extern void __init gfs2_quota_hash_init(void); + +void __init gfs2_quota_hash_init(void); #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 7a0c9d0b7503..6a0fd42e1120 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -17,18 +17,18 @@ static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk) *blk = 0; } -extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, +int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh); -extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); -extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); -extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); +int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +void gfs2_revoke_clean(struct gfs2_jdesc *jd); -extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); -extern void gfs2_recover_func(struct work_struct *work); -extern int __get_log_header(struct gfs2_sbd *sdp, - const struct gfs2_log_header *lh, unsigned int blkno, - struct gfs2_log_header_host *head); +int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); +void gfs2_recover_func(struct work_struct *work); +int __get_log_header(struct gfs2_sbd *sdp, + const struct gfs2_log_header *lh, unsigned int blkno, + struct gfs2_log_header_host *head); #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9308190895c8..c2060203b98a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2411,13 +2411,12 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, * @bn: Used to return the starting block number * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 - * @generation: the generation number of the inode * * Returns: 0 or error */ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, - bool dinode, u64 *generation) + bool dinode) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -2477,10 +2476,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, rbm.rgd->rd_free -= *nblocks; spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { + u64 generation; + rbm.rgd->rd_dinodes++; - *generation = rbm.rgd->rd_igeneration++; - if (*generation == 0) - *generation = rbm.rgd->rd_igeneration++; + generation = rbm.rgd->rd_igeneration++; + if (generation == 0) + generation = rbm.rgd->rd_igeneration++; + ip->i_generation = generation; } gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 00b30cf893af..8d20e99385db 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -22,38 +22,38 @@ struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; -extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); +void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); -extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); -extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); -extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); +struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); +struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); -extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); -extern int gfs2_rindex_update(struct gfs2_sbd *sdp); -extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); -extern int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); -extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); +void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +int gfs2_rindex_update(struct gfs2_sbd *sdp); +void gfs2_free_clones(struct gfs2_rgrpd *rgd); +int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); +void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); -extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, - struct gfs2_alloc_parms *ap); -extern void gfs2_inplace_release(struct gfs2_inode *ip); - -extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, - bool dinode, u64 *generation); - -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip); -extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, - u64 bstart, u32 blen, int meta); -extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, - u64 bstart, u32 blen); -extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); -extern void gfs2_unlink_di(struct inode *inode); -extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, - unsigned int type); +int gfs2_inplace_reserve(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap); +void gfs2_inplace_release(struct gfs2_inode *ip); + +int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, + bool dinode); + +void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +void gfs2_rs_delete(struct gfs2_inode *ip); +void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, + u64 bstart, u32 blen, int meta); +void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, + u64 bstart, u32 blen); +void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); +void gfs2_unlink_di(struct inode *inode); +int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, + unsigned int type); struct gfs2_rgrp_list { unsigned int rl_rgrps; @@ -62,18 +62,19 @@ struct gfs2_rgrp_list { struct gfs2_holder *rl_ghs; }; -extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, - u64 block); -extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, - unsigned int state, u16 flags); -extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); -extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, - const char *fs_id_buf); -extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, - struct buffer_head *bh, - const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); -extern int gfs2_fitrim(struct file *filp, void __user *argp); +void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, + u64 block); +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags); +void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +u64 gfs2_ri_total(struct gfs2_sbd *sdp); +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, + const char *fs_id_buf); +int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, + struct buffer_head *bh, + const struct gfs2_bitmap *bi, unsigned minlen, + u64 *ptrimmed); +int gfs2_fitrim(struct file *filp, void __user *argp); /* This is how to tell if a reservation is in the rgrp tree: */ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) @@ -88,9 +89,9 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) return first <= block && block < last; } -extern void check_and_update_goal(struct gfs2_inode *ip); +void check_and_update_goal(struct gfs2_inode *ip); -extern void rgrp_lock_local(struct gfs2_rgrpd *rgd); -extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd); +void rgrp_lock_local(struct gfs2_rgrpd *rgd); +void rgrp_unlock_local(struct gfs2_rgrpd *rgd); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 52a878fa7139..d21c04a22d73 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -602,13 +602,15 @@ restart: } spin_unlock(&sdp->sd_jindex_spin); - if (!sb_rdonly(sb)) { + if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); - } - if (gfs2_withdrawn(sdp)) { - gfs2_destroy_threads(sdp); + else { + if (gfs2_withdrawn(sdp)) + gfs2_destroy_threads(sdp); + gfs2_quota_cleanup(sdp); } + WARN_ON(gfs2_withdrawing(sdp)); /* At this point, we're through modifying the disk */ @@ -1006,6 +1008,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = sc.sc_dinodes + sc.sc_free; buf->f_ffree = sc.sc_free; buf->f_namelen = GFS2_FNAMESIZE; + buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); return 0; } @@ -1299,18 +1302,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * As a last resort, if another node keeps holding the iopen glock * without showing any activity on the inode glock, we will eventually * time out and fail the iopen glock upgrade. - * - * Note that we're passing the LM_FLAG_TRY_1CB flag to the first - * locking request as an optimization to notify lock holders as soon as - * possible. Without that flag, they'd be notified implicitly by the - * second locking request. */ - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh); - error = gfs2_glock_nq(gh); - if (error != GLR_TRYFAILED) - return !error; - gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); error = gfs2_glock_nq(gh); if (error) @@ -1550,7 +1543,7 @@ out: wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; + rcu_assign_pointer(ip->i_gl, NULL); } } @@ -1576,7 +1569,7 @@ static void gfs2_free_inode(struct inode *inode) kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode)); } -extern void free_local_statfs_inodes(struct gfs2_sbd *sdp) +void free_local_statfs_inodes(struct gfs2_sbd *sdp) { struct local_statfs_inode *lsi, *safe; @@ -1591,8 +1584,8 @@ extern void free_local_statfs_inodes(struct gfs2_sbd *sdp) } } -extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, - unsigned int index) +struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, + unsigned int index) { struct local_statfs_inode *lsi; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b4ddf6244586..b27a774d9580 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -15,7 +15,7 @@ #define GFS2_FS_FORMAT_MIN (1801) #define GFS2_FS_FORMAT_MAX (1802) -extern void gfs2_lm_unmount(struct gfs2_sbd *sdp); +void gfs2_lm_unmount(struct gfs2_sbd *sdp); static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) { @@ -26,33 +26,33 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) return x; } -extern void gfs2_jindex_free(struct gfs2_sbd *sdp); +void gfs2_jindex_free(struct gfs2_sbd *sdp); -extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); -extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); -extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, - struct gfs2_inode **ipp); +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); +int gfs2_jdesc_check(struct gfs2_jdesc *jd); +int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, + struct gfs2_inode **ipp); -extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); -extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp); -extern void gfs2_online_uevent(struct gfs2_sbd *sdp); -extern void gfs2_destroy_threads(struct gfs2_sbd *sdp); -extern int gfs2_statfs_init(struct gfs2_sbd *sdp); -extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, - s64 dinodes); -extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, - const void *buf); -extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, - void *buf); -extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); -extern int gfs2_statfs_sync(struct super_block *sb, int type); -extern void gfs2_freeze_func(struct work_struct *work); -extern void gfs2_thaw_freeze_initiator(struct super_block *sb); +int gfs2_make_fs_rw(struct gfs2_sbd *sdp); +void gfs2_make_fs_ro(struct gfs2_sbd *sdp); +void gfs2_online_uevent(struct gfs2_sbd *sdp); +void gfs2_destroy_threads(struct gfs2_sbd *sdp); +int gfs2_statfs_init(struct gfs2_sbd *sdp); +void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, + s64 dinodes); +void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, + const void *buf); +void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, + void *buf); +void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); +int gfs2_statfs_sync(struct super_block *sb, int type); +void gfs2_freeze_func(struct work_struct *work); +void gfs2_thaw_freeze_initiator(struct super_block *sb); -extern void free_local_statfs_inodes(struct gfs2_sbd *sdp); -extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, - unsigned int index); -extern void free_sbd(struct gfs2_sbd *sdp); +void free_local_statfs_inodes(struct gfs2_sbd *sdp); +struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, + unsigned int index); +void free_sbd(struct gfs2_sbd *sdp); extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index c76ad9a4c75a..f8ce5302280d 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -34,17 +34,17 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned return rgd->rd_length; } -extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, - unsigned int blocks, unsigned int revokes, - unsigned long ip); -extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, - unsigned int revokes); - -extern void gfs2_trans_end(struct gfs2_sbd *sdp); -extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); -extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); -extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); -extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); -extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr); +int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + unsigned long ip); +int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, + unsigned int revokes); + +void gfs2_trans_end(struct gfs2_sbd *sdp); +void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); +void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr); #endif /* __TRANS_DOT_H__ */ diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index cdb839529175..11c9d59b6889 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -147,10 +147,10 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); -extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - bool verbose); -extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp); -extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose); +int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp); +void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 79d5c5559512..8c96ba6230d1 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -639,7 +639,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) u64 block; int error; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); @@ -701,7 +701,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); @@ -1002,7 +1002,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &blk, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, blk, 1); diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index 2aed9d7d483d..eb12eb7e37c1 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -50,14 +50,14 @@ struct gfs2_ea_location { struct gfs2_ea_header *el_prev; }; -extern int __gfs2_xattr_set(struct inode *inode, const char *name, - const void *value, size_t size, - int flags, int type); -extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); -extern int gfs2_ea_dealloc(struct gfs2_inode *ip); +int __gfs2_xattr_set(struct inode *inode, const char *name, + const void *value, size_t size, + int flags, int type); +ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); +int gfs2_ea_dealloc(struct gfs2_inode *ip); /* Exported to acl.c */ -extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); +int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); #endif /* __EATTR_DOT_H__ */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 54b3d489b6a7..f757d4f7ad98 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1179,7 +1179,9 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); struct hstate *h = hstate_inode(d_inode(dentry)); + u64 id = huge_encode_dev(dentry->d_sb->s_dev); + buf->f_fsid = u64_to_fsid(id); buf->f_type = HUGETLBFS_MAGIC; buf->f_bsize = huge_page_size(h); if (sbinfo) { diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 7ea37f49f1e1..f99591a634b4 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -150,6 +150,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child) } static const struct export_operations jffs2_export_ops = { + .encode_fh = generic_encode_ino32_fh, .get_parent = jffs2_get_parent, .fh_to_dentry = jffs2_fh_to_dentry, .fh_to_parent = jffs2_fh_to_parent, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 966826c394ee..8d8e556bd610 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -896,6 +896,7 @@ static const struct super_operations jfs_super_operations = { }; static const struct export_operations jfs_export_operations = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = jfs_fh_to_dentry, .fh_to_parent = jfs_fh_to_parent, .get_parent = jfs_get_parent, diff --git a/fs/libfs.c b/fs/libfs.c index abe2b5a40ba1..e9440d55073c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -41,6 +41,9 @@ EXPORT_SYMBOL(simple_getattr); int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { + u64 id = huge_encode_dev(dentry->d_sb->s_dev); + + buf->f_fsid = u64_to_fsid(id); buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = PAGE_SIZE; buf->f_namelen = NAME_MAX; @@ -1310,6 +1313,47 @@ ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, EXPORT_SYMBOL_GPL(simple_attr_write_signed); /** + * generic_encode_ino32_fh - generic export_operations->encode_fh function + * @inode: the object to encode + * @fh: where to store the file handle fragment + * @max_len: maximum length to store there (in 4 byte units) + * @parent: parent directory inode, if wanted + * + * This generic encode_fh function assumes that the 32 inode number + * is suitable for locating an inode, and that the generation number + * can be used to check that it is still valid. It places them in the + * filehandle fragment where export_decode_fh expects to find them. + */ +int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent) +{ + struct fid *fid = (void *)fh; + int len = *max_len; + int type = FILEID_INO32_GEN; + + if (parent && (len < 4)) { + *max_len = 4; + return FILEID_INVALID; + } else if (len < 2) { + *max_len = 2; + return FILEID_INVALID; + } + + len = 2; + fid->i32.ino = inode->i_ino; + fid->i32.gen = inode->i_generation; + if (parent) { + fid->i32.parent_ino = parent->i_ino; + fid->i32.parent_gen = parent->i_generation; + len = 4; + type = FILEID_INO32_GEN_PARENT; + } + *max_len = len; + return type; +} +EXPORT_SYMBOL_GPL(generic_encode_ino32_fh); + +/** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 4905665c47d0..57d1dedf3f8f 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -256,6 +256,7 @@ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) return idmap; } +EXPORT_SYMBOL_GPL(mnt_idmap_get); /** * mnt_idmap_put - put a reference to an idmapping @@ -271,3 +272,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap) kfree(idmap); } } +EXPORT_SYMBOL_GPL(mnt_idmap_put); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 7df2503cef6c..01ac733a6320 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -125,7 +125,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate - depends on NFS_V4_1 && NFS_V3 + depends on NFS_V4_1 default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index cf7365581031..fa1a14def45c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -448,6 +448,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<<NFS_DELEGATION_REFERENCED; + delegation->test_gen = 0; spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); @@ -1294,6 +1295,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, struct inode *inode; const struct cred *cred; nfs4_stateid stateid; + unsigned long gen = ++server->delegation_gen; + restart: rcu_read_lock(); restart_locked: @@ -1303,7 +1306,8 @@ restart_locked: test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_TEST_EXPIRED, - &delegation->flags) == 0) + &delegation->flags) == 0 || + delegation->test_gen == gen) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) @@ -1312,6 +1316,7 @@ restart_locked: cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); spin_unlock(&delegation->lock); + delegation->test_gen = gen; clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); rcu_read_unlock(); nfs_delegation_test_free_expired(inode, &stateid, cred); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1c378992b7c0..a6f495d012cf 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -21,6 +21,7 @@ struct nfs_delegation { fmode_t type; unsigned long pagemod_limit; __u64 change_attr; + unsigned long test_gen; unsigned long flags; refcount_t refcount; spinlock_t lock; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e6a51fd94fea..13dffe4201e6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2532,7 +2532,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink); int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { - struct page *page; + struct folio *folio; char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); @@ -2547,24 +2547,24 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; - page = alloc_page(GFP_USER); - if (!page) + folio = folio_alloc(GFP_USER, 0); + if (!folio) return -ENOMEM; - kaddr = page_address(page); + kaddr = folio_address(folio); memcpy(kaddr, symname, pathlen); if (pathlen < PAGE_SIZE) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); trace_nfs_symlink_enter(dir, dentry); - error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, dentry, symname, error); d_drop(dentry); - __free_page(page); + folio_put(folio); return error; } @@ -2574,18 +2574,13 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ - if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0, - GFP_KERNEL)) { - SetPageUptodate(page); - unlock_page(page); - /* - * add_to_page_cache_lru() grabs an extra page refcount. - * Drop it here to avoid leaking this page later. - */ - put_page(page); - } else - __free_page(page); + if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0, + GFP_KERNEL) == 0) { + folio_mark_uptodate(folio); + folio_unlock(folio); + } + folio_put(folio); return 0; } EXPORT_SYMBOL_GPL(nfs_symlink); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4bf208a0a8e9..2de66e4e8280 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -543,9 +543,10 @@ out: } static int -nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio, unsigned int len, struct iattr *sattr) { + struct page *page = &folio->page; struct nfs3_createdata *data; struct dentry *d_alias; int status = -ENOMEM; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 827d00e2f094..581698f1b7b2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -209,6 +209,7 @@ struct nfs4_exception { struct inode *inode; nfs4_stateid *stateid; long timeout; + unsigned short retrans; unsigned char task_is_privileged : 1; unsigned char delay : 1, recovering : 1, @@ -546,6 +547,7 @@ extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; extern unsigned short send_implementation_id; extern bool recover_lost_locks; +extern short nfs_delay_retrans; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a654d7234f51..8a943fffaad5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -585,6 +585,21 @@ wait_on_recovery: return 0; } +/* + * Track the number of NFS4ERR_DELAY related retransmissions and return + * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit + * set by 'nfs_delay_retrans'. + */ +static int nfs4_exception_should_retrans(const struct nfs_server *server, + struct nfs4_exception *exception) +{ + if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) { + if (exception->retrans++ >= (unsigned short)nfs_delay_retrans) + return -EAGAIN; + } + return 0; +} + /* This is the error handling routine for processes that are allowed * to sleep. */ @@ -595,6 +610,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } ret = nfs4_delay(&exception->timeout, exception->interruptible); goto out_retry; @@ -623,6 +643,11 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } rpc_delay(task, nfs4_update_delay(&exception->timeout)); goto out_retry; } @@ -5011,9 +5036,10 @@ static void nfs4_free_createdata(struct nfs4_createdata *data) } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr, + struct folio *folio, unsigned int len, struct iattr *sattr, struct nfs4_label *label) { + struct page *page = &folio->page; struct nfs4_createdata *data; int status = -ENAMETOOLONG; @@ -5038,7 +5064,7 @@ out: } static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr) + struct folio *folio, unsigned int len, struct iattr *sattr) { struct nfs4_exception exception = { .interruptible = true, @@ -5049,7 +5075,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, label = nfs4_label_init_security(dir, dentry, sattr, &l); do { - err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label); + err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label); trace_nfs4_symlink(dir, &dentry->d_name, err); err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); @@ -5622,7 +5648,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); - nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); + nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -5663,7 +5689,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); - nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); + nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args, @@ -8934,6 +8961,7 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); +try_again: /* Test connection for session trunking. Async exchange_id call */ task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt); if (IS_ERR(task)) @@ -8946,11 +8974,15 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, if (status == 0) rpc_clnt_xprt_switch_add_xprt(clnt, xprt); - else if (rpc_clnt_xprt_switch_has_addr(clnt, + else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt, (struct sockaddr *)&xprt->addr)) rpc_clnt_xprt_switch_remove_xprt(clnt, xprt); rpc_put_task(task); + if (status == -NFS4ERR_DELAY) { + ssleep(1); + goto try_again; + } } EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); @@ -9621,6 +9653,9 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, nfs4_sequence_free_slot(&lgp->res.seq_res); + exception->state = NULL; + exception->stateid = NULL; + switch (nfs4err) { case 0: goto out; @@ -9716,7 +9751,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, + struct nfs4_exception *exception) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); @@ -9736,13 +9772,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) RPC_TASK_MOVEABLE, }; struct pnfs_layout_segment *lseg = NULL; - struct nfs4_exception exception = { - .inode = inode, - .timeout = *timeout, - }; int status = 0; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); + exception->retry = 0; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -9753,11 +9786,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) goto out; if (task->tk_status < 0) { - status = nfs4_layoutget_handle_exception(task, lgp, &exception); - *timeout = exception.timeout; + exception->retry = 1; + status = nfs4_layoutget_handle_exception(task, lgp, exception); } else if (lgp->res.layoutp->len == 0) { + exception->retry = 1; status = -EAGAIN; - *timeout = nfs4_update_delay(&exception.timeout); + nfs4_update_delay(&exception->timeout); } else lseg = pnfs_layout_process(lgp); out: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 84343aefbbd6..21a365357629 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1980,7 +1980,9 @@ pnfs_update_layout(struct inode *ino, struct pnfs_layout_segment *lseg = NULL; struct nfs4_layoutget *lgp; nfs4_stateid stateid; - long timeout = 0; + struct nfs4_exception exception = { + .inode = ino, + }; unsigned long giveup = jiffies + (clp->cl_lease_time << 1); bool first; @@ -2144,7 +2146,7 @@ lookup_again: lgp->lo = lo; pnfs_get_layout_hdr(lo); - lseg = nfs4_proc_layoutget(lgp, &timeout); + lseg = nfs4_proc_layoutget(lgp, &exception); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); nfs_layoutget_end(lo); @@ -2171,6 +2173,8 @@ lookup_again: goto out_put_layout_hdr; } if (lseg) { + if (!exception.retry) + goto out_put_layout_hdr; if (first) pnfs_clear_first_layoutget(lo); trace_pnfs_update_layout(ino, pos, count, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d886c8226d8f..db57a85500ee 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -35,6 +35,7 @@ #include <linux/nfs_page.h> #include <linux/workqueue.h> +struct nfs4_exception; struct nfs4_opendata; enum { @@ -245,7 +246,9 @@ extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, const struct cred *cred); -extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); +extern struct pnfs_layout_segment * +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, + struct nfs4_exception *exception); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e3570c656b0f..ad3a321ae997 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -396,9 +396,10 @@ nfs_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio, unsigned int len, struct iattr *sattr) { + struct page *page = &folio->page; struct nfs_fh *fh; struct nfs_fattr *fattr; struct nfs_symlinkargs arg = { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2667ab753d42..075b31c93f87 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1371,6 +1371,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; +short nfs_delay_retrans = -1; EXPORT_SYMBOL_GPL(nfs_callback_nr_threads); EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); @@ -1381,6 +1382,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); EXPORT_SYMBOL_GPL(recover_lost_locks); +EXPORT_SYMBOL_GPL(nfs_delay_retrans); #define NFS_CALLBACK_MAXPORTNR (65535U) @@ -1429,5 +1431,9 @@ MODULE_PARM_DESC(recover_lost_locks, "If the server reports that a lock might be lost, " "try to recover it risking data corruption."); - +module_param_named(delay_retrans, nfs_delay_retrans, short, 0644); +MODULE_PARM_DESC(delay_retrans, + "Unless negative, specifies the number of times the NFSv4 " + "client retries a request before returning an EAGAIN error, " + "after a reply of NFS4ERR_DELAY from the server."); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9d82d50ce0b1..b664caea8b4e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -739,6 +739,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); + if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR) + break; } while (err < 0 && !nfs_error_is_fatal(err)); nfs_io_completion_put(ioc); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b7da17e53007..7b641095a665 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -426,8 +426,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) return -EINVAL; } - if (!inode->i_sb->s_export_op || - !inode->i_sb->s_export_op->fh_to_dentry) { + if (!exportfs_can_decode_fh(inode->i_sb->s_export_op)) { dprintk("exp_export: export of invalid fs type.\n"); return -EINVAL; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 45aecdc302f4..4d765c72496f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1595,7 +1595,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) * file handles so user can use name_to_handle_at() to compare fids * reported with events to the file handle of watched objects. */ - if (!nop) + if (!exportfs_can_encode_fid(nop)) return -EOPNOTSUPP; /* @@ -1603,7 +1603,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) * supports decoding file handles, so user has a way to map back the * reported fids to filesystem objects. */ - if (mark_type != FAN_MARK_INODE && !nop->fh_to_dentry) + if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop)) return -EOPNOTSUPP; return 0; diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index ab44f2db533b..d7498ddc4a72 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -384,6 +384,7 @@ static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, * and due to using iget() whereas NTFS needs ntfs_iget(). */ const struct export_operations ntfs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .get_parent = ntfs_get_parent, /* Find the parent of a given directory. */ .fh_to_dentry = ntfs_fh_to_dentry, diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f763e3256ccc..9153dffde950 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -811,6 +811,7 @@ static int ntfs_nfs_commit_metadata(struct inode *inode) } static const struct export_operations ntfs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ntfs_fh_to_dentry, .fh_to_parent = ntfs_fh_to_parent, .get_parent = ntfs3_get_parent, diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 4e173d56b11f..5648954f8588 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \ - copy_up.o export.o params.o + copy_up.o export.o params.o xattrs.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ada3fcc9c6d5..4382881b0709 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, return PTR_ERR(old_file); /* Try to use clone_file_range to clone up within the same fs */ + ovl_start_write(dentry); cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); + ovl_end_write(dentry); if (cloned == len) goto out_fput; /* Couldn't clone, so now we try to copy the data */ @@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, * it may not recognize all kind of holes and sometimes * only skips partial of hole area. However, it will be * enough for most of the use cases. + * + * We do not hold upper sb_writers throughout the loop to avert + * lockdep warning with llseek of lower file in nested overlay: + * - upper sb_writers + * -- lower ovl_inode_lock (ovl_llseek) */ - if (skip_hole && data_pos < old_pos) { data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); if (data_pos > old_pos) { @@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, } } + ovl_start_write(dentry); bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); + ovl_end_write(dentry); if (bytes <= 0) { error = bytes; break; @@ -426,29 +434,29 @@ out_err: return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, - struct dentry *upper) +struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) { - const struct ovl_fh *fh = NULL; - int err; - /* * When lower layer doesn't support export operations store a 'null' fh, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ - if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_real_fh(ofs, lower, false); - if (IS_ERR(fh)) - return PTR_ERR(fh); - } + if (!ovl_can_decode_fh(origin->d_sb)) + return NULL; + + return ovl_encode_real_fh(ofs, origin, false); +} + +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, + struct dentry *upper) +{ + int err; /* * Do not fail when upper doesn't support xattrs. */ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); - kfree(fh); /* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; @@ -476,7 +484,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, * * Caller must hold i_mutex on indexdir. */ -static int ovl_create_index(struct dentry *dentry, struct dentry *origin, +static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -502,7 +510,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; - err = ovl_get_index_name(ofs, origin, &name); + err = ovl_get_index_name_fh(fh, &name); if (err) return err; @@ -541,6 +549,7 @@ struct ovl_copy_up_ctx { struct dentry *destdir; struct qstr destname; struct dentry *workdir; + const struct ovl_fh *origin_fh; bool origin; bool indexed; bool metacopy; @@ -555,14 +564,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(upperdir); + ovl_start_write(c->dentry); + /* Mark parent "impure" because it may now contain non-pure upper */ err = ovl_set_impure(c->parent, upperdir); if (err) - return err; + goto out; err = ovl_set_nlink_lower(c->dentry); if (err) - return err; + goto out; inode_lock_nested(udir, I_MUTEX_PARENT); upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir, @@ -581,10 +592,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) } inode_unlock(udir); if (err) - return err; + goto out; err = ovl_set_nlink_upper(c->dentry); +out: + ovl_end_write(c->dentry); return err; } @@ -637,7 +650,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); + err = ovl_set_origin_fh(ofs, c->origin_fh, temp); if (err) return err; } @@ -719,21 +732,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) .link = c->link }; - /* workdir and destdir could be the same when copying up to indexdir */ - err = -EIO; - if (lock_rename(c->workdir, c->destdir) != NULL) - goto unlock; - err = ovl_prep_cu_creds(c->dentry, &cc); if (err) - goto unlock; + return err; + ovl_start_write(c->dentry); + inode_lock(wdir); temp = ovl_create_temp(ofs, c->workdir, &cattr); + inode_unlock(wdir); + ovl_end_write(c->dentry); ovl_revert_cu_creds(&cc); - err = PTR_ERR(temp); if (IS_ERR(temp)) - goto unlock; + return PTR_ERR(temp); /* * Copy up data first and then xattrs. Writing data after @@ -741,15 +752,28 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) */ path.dentry = temp; err = ovl_copy_up_data(c, &path); - if (err) + /* + * We cannot hold lock_rename() throughout this helper, because or + * lock ordering with sb_writers, which shouldn't be held when calling + * ovl_copy_up_data(), so lock workdir and destdir and make sure that + * temp wasn't moved before copy up completion or cleanup. + * If temp was moved, abort without the cleanup. + */ + ovl_start_write(c->dentry); + if (lock_rename(c->workdir, c->destdir) != NULL || + temp->d_parent != c->workdir) { + err = -EIO; + goto unlock; + } else if (err) { goto cleanup; + } err = ovl_copy_up_metadata(c, temp); if (err) goto cleanup; if (S_ISDIR(c->stat.mode) && c->indexed) { - err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + err = ovl_create_index(c->dentry, c->origin_fh, temp); if (err) goto cleanup; } @@ -779,6 +803,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ovl_set_flag(OVL_WHITEOUTS, inode); unlock: unlock_rename(c->workdir, c->destdir); + ovl_end_write(c->dentry); return err; @@ -802,9 +827,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (err) return err; + ovl_start_write(c->dentry); tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); + ovl_end_write(c->dentry); ovl_revert_cu_creds(&cc); - if (IS_ERR(tmpfile)) return PTR_ERR(tmpfile); @@ -815,9 +841,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) goto out_fput; } + ovl_start_write(c->dentry); + err = ovl_copy_up_metadata(c, temp); if (err) - goto out_fput; + goto out; inode_lock_nested(udir, I_MUTEX_PARENT); @@ -831,7 +859,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) inode_unlock(udir); if (err) - goto out_fput; + goto out; if (c->metacopy_digest) ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry)); @@ -843,6 +871,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) ovl_set_upperdata(d_inode(c->dentry)); ovl_inode_update(d_inode(c->dentry), dget(temp)); +out: + ovl_end_write(c->dentry); out_fput: fput(tmpfile); return err; @@ -861,6 +891,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct dentry *origin = c->lowerpath.dentry; + struct ovl_fh *fh = NULL; bool to_index = false; /* @@ -877,25 +909,35 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) to_index = true; } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) { + fh = ovl_get_origin_fh(ofs, origin); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + /* origin_fh may be NULL */ + c->origin_fh = fh; c->origin = true; + } if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); - err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); + err = ovl_get_index_name(ofs, origin, &c->destname); if (err) - return err; + goto out_free_fh; } else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */ - return -EIO; + err = -EIO; + goto out_free_fh; } else { /* * Mark parent "impure" because it may now contain non-pure * upper */ + ovl_start_write(c->dentry); err = ovl_set_impure(c->parent, c->destdir); + ovl_end_write(c->dentry); if (err) - return err; + goto out_free_fh; } /* Should we copyup with O_TMPFILE or with workdir? */ @@ -909,6 +951,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) if (c->indexed) ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + ovl_start_write(c->dentry); if (to_index) { /* Initialize nlink for copy up of disconnected dentry */ err = ovl_set_nlink_upper(c->dentry); @@ -923,10 +966,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry)); } + ovl_end_write(c->dentry); out: if (to_index) kfree(c->destname.name); +out_free_fh: + kfree(fh); return err; } @@ -1011,15 +1057,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) * Writing to upper file will clear security.capability xattr. We * don't want that to happen for normal copy-up operation. */ + ovl_start_write(c->dentry); if (capability) { err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS, capability, cap_size, 0); - if (err) - goto out_free; } - - - err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY); + if (!err) { + err = ovl_removexattr(ofs, upperpath.dentry, + OVL_XATTR_METACOPY); + } + ovl_end_write(c->dentry); if (err) goto out_free; @@ -1170,17 +1217,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) int ovl_maybe_copy_up(struct dentry *dentry, int flags) { - int err = 0; - - if (ovl_open_need_copy_up(dentry, flags)) { - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up_flags(dentry, flags); - ovl_drop_write(dentry); - } - } + if (!ovl_open_need_copy_up(dentry, flags)) + return 0; - return err; + return ovl_copy_up_flags(dentry, flags); } int ovl_copy_up_with_data(struct dentry *dentry) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 033fc0458a3d..aab3f5d93556 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -477,7 +477,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_unlock; err = -ESTALE; - if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) + if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper)) goto out_dput; newdentry = ovl_create_temp(ofs, workdir, cattr); @@ -559,10 +559,6 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, struct cred *override_cred; struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(parent); - if (err) - return err; - old_cred = ovl_override_creds(dentry->d_sb); /* @@ -626,6 +622,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, .link = link, }; + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; @@ -700,28 +700,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir, int err; struct inode *inode; - err = ovl_want_write(old); + err = ovl_copy_up(old); if (err) goto out; - err = ovl_copy_up(old); + err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; + goto out; - err = ovl_copy_up(new->d_parent); + err = ovl_nlink_start(old); if (err) - goto out_drop_write; + goto out; if (ovl_is_metacopy_dentry(old)) { err = ovl_set_link_redirect(old); if (err) - goto out_drop_write; + goto out_nlink_end; } - err = ovl_nlink_start(old); - if (err) - goto out_drop_write; - inode = d_inode(old); ihold(inode); @@ -731,9 +727,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) iput(inode); +out_nlink_end: ovl_nlink_end(old); -out_drop_write: - ovl_drop_write(old); out: return err; } @@ -891,17 +886,13 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out; } - err = ovl_want_write(dentry); - if (err) - goto out; - err = ovl_copy_up(dentry->d_parent); if (err) - goto out_drop_write; + goto out; err = ovl_nlink_start(dentry); if (err) - goto out_drop_write; + goto out; old_cred = ovl_override_creds(dentry->d_sb); if (!lower_positive) @@ -926,8 +917,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (ovl_dentry_upper(dentry)) ovl_copyattr(d_inode(dentry)); -out_drop_write: - ovl_drop_write(dentry); out: ovl_cache_free(&list); return err; @@ -1131,29 +1120,32 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } - err = ovl_want_write(old); - if (err) - goto out; - err = ovl_copy_up(old); if (err) - goto out_drop_write; + goto out; err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; + goto out; if (!overwrite) { err = ovl_copy_up(new); if (err) - goto out_drop_write; + goto out; } else if (d_inode(new)) { err = ovl_nlink_start(new); if (err) - goto out_drop_write; + goto out; update_nlink = true; } + if (!update_nlink) { + /* ovl_nlink_start() took ovl_want_write() */ + err = ovl_want_write(old); + if (err) + goto out; + } + old_cred = ovl_override_creds(old->d_sb); if (!list_empty(&list)) { @@ -1219,7 +1211,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } else { if (!d_is_negative(newdentry)) { - if (!new_opaque || !ovl_is_whiteout(newdentry)) + if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry)) goto out_dput; } else { if (flags & RENAME_EXCHANGE) @@ -1286,8 +1278,8 @@ out_revert_creds: revert_creds(old_cred); if (update_nlink) ovl_nlink_end(new); -out_drop_write: - ovl_drop_write(old); + else + ovl_drop_write(old); out: dput(opaquedir); ovl_cache_free(&list); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 26b782c53910..7e16bbcad95e 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) if (ovl_dentry_upper(dentry)) return 0; - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up(dentry); - ovl_drop_write(dentry); - } - + err = ovl_copy_up(dentry); if (err) { pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n", dentry, err); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index ec3671ca140c..131621daeb13 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -15,10 +15,15 @@ #include <linux/fs.h> #include "overlayfs.h" +#include "../internal.h" /* for sb_init_dio_done_wq */ + struct ovl_aio_req { struct kiocb iocb; refcount_t ref; struct kiocb *orig_iocb; + /* used for aio completion */ + struct work_struct work; + long res; }; static struct kmem_cache *ovl_aio_request_cachep; @@ -235,6 +240,12 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) return ret; } +static void ovl_file_modified(struct file *file) +{ + /* Update size/mtime */ + ovl_copyattr(file_inode(file)); +} + static void ovl_file_accessed(struct file *file) { struct inode *inode, *upperinode; @@ -263,20 +274,12 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -static rwf_t ovl_iocb_to_rwf(int ifl) +#define OVL_IOCB_MASK \ + (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) + +static rwf_t iocb_to_rw_flags(int flags) { - rwf_t flags = 0; - - if (ifl & IOCB_NOWAIT) - flags |= RWF_NOWAIT; - if (ifl & IOCB_HIPRI) - flags |= RWF_HIPRI; - if (ifl & IOCB_DSYNC) - flags |= RWF_DSYNC; - if (ifl & IOCB_SYNC) - flags |= RWF_SYNC; - - return flags; + return (__force rwf_t)(flags & OVL_IOCB_MASK); } static inline void ovl_aio_put(struct ovl_aio_req *aio_req) @@ -293,10 +296,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) struct kiocb *orig_iocb = aio_req->orig_iocb; if (iocb->ki_flags & IOCB_WRITE) { - struct inode *inode = file_inode(orig_iocb->ki_filp); - kiocb_end_write(iocb); - ovl_copyattr(inode); + ovl_file_modified(orig_iocb->ki_filp); } orig_iocb->ki_pos = iocb->ki_pos; @@ -313,6 +314,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res) orig_iocb->ki_complete(orig_iocb, res); } +static void ovl_aio_complete_work(struct work_struct *work) +{ + struct ovl_aio_req *aio_req = container_of(work, + struct ovl_aio_req, work); + + ovl_aio_rw_complete(&aio_req->iocb, aio_req->res); +} + +static void ovl_aio_queue_completion(struct kiocb *iocb, long res) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + /* + * Punt to a work queue to serialize updates of mtime/size. + */ + aio_req->res = res; + INIT_WORK(&aio_req->work, ovl_aio_complete_work); + queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq, + &aio_req->work); +} + +static int ovl_init_aio_done_wq(struct super_block *sb) +{ + if (sb->s_dio_done_wq) + return 0; + + return sb_init_dio_done_wq(sb); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -334,8 +366,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb->ki_flags)); + rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags); + + ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf); } else { struct ovl_aio_req *aio_req; @@ -401,15 +434,20 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(ifl); + file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(ifl)); + ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf); file_end_write(real.file); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(file); } else { struct ovl_aio_req *aio_req; + ret = ovl_init_aio_done_wq(inode->i_sb); + if (ret) + goto out; + ret = -ENOMEM; aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); if (!aio_req) @@ -418,7 +456,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_rw_complete; + aio_req->iocb.ki_complete = ovl_aio_queue_completion; refcount_set(&aio_req->ref, 2); kiocb_start_write(&aio_req->iocb); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); @@ -492,7 +530,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, file_end_write(real.file); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(out); revert_creds(old_cred); fdput(real); @@ -573,7 +611,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len revert_creds(old_cred); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(file); fdput(real); @@ -657,7 +695,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, revert_creds(old_cred); /* Update size */ - ovl_copyattr(inode_out); + ovl_file_modified(file_out); fdput(real_in); fdput(real_out); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b6e98a7d36ce..345b8f161ca4 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -32,10 +32,6 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; - err = ovl_want_write(dentry); - if (err) - goto out; - if (attr->ia_valid & ATTR_SIZE) { /* Truncate should trigger data copy up as well */ full_copy_up = true; @@ -54,7 +50,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, winode = d_inode(upperdentry); err = get_write_access(winode); if (err) - goto out_drop_write; + goto out; } if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -78,6 +74,10 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ attr->ia_valid &= ~ATTR_OPEN; + err = ovl_want_write(dentry); + if (err) + goto out_put_write; + inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); err = ovl_do_notify_change(ofs, upperdentry, attr); @@ -85,12 +85,12 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (!err) ovl_copyattr(dentry->d_inode); inode_unlock(upperdentry->d_inode); + ovl_drop_write(dentry); +out_put_write: if (winode) put_write_access(winode); } -out_drop_write: - ovl_drop_write(dentry); out: return err; } @@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry, return p; } -bool ovl_is_private_xattr(struct super_block *sb, const char *name) -{ - struct ovl_fs *ofs = OVL_FS(sb); - - if (ofs->config.userxattr) - return strncmp(name, OVL_XATTR_USER_PREFIX, - sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; - else - return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, - sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; -} - -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - int err; - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); - struct dentry *upperdentry = ovl_i_dentry_upper(inode); - struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - struct path realpath; - const struct cred *old_cred; - - err = ovl_want_write(dentry); - if (err) - goto out; - - if (!value && !upperdentry) { - ovl_path_lower(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); - revert_creds(old_cred); - if (err < 0) - goto out_drop_write; - } - - if (!upperdentry) { - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; - - realdentry = ovl_dentry_upper(dentry); - } - - old_cred = ovl_override_creds(dentry->d_sb); - if (value) { - err = ovl_do_setxattr(ofs, realdentry, name, value, size, - flags); - } else { - WARN_ON(flags != XATTR_REPLACE); - err = ovl_do_removexattr(ofs, realdentry, name); - } - revert_creds(old_cred); - - /* copy c/mtime */ - ovl_copyattr(inode); - -out_drop_write: - ovl_drop_write(dentry); -out: - return err; -} - -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, - void *value, size_t size) -{ - ssize_t res; - const struct cred *old_cred; - struct path realpath; - - ovl_i_path_real(inode, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); - revert_creds(old_cred); - return res; -} - -static bool ovl_can_list(struct super_block *sb, const char *s) -{ - /* Never list private (.overlay) */ - if (ovl_is_private_xattr(sb, s)) - return false; - - /* List all non-trusted xattrs */ - if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) - return true; - - /* list other trusted for superuser only */ - return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); -} - -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) -{ - struct dentry *realdentry = ovl_dentry_real(dentry); - ssize_t res; - size_t len; - char *s; - const struct cred *old_cred; - - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_listxattr(realdentry, list, size); - revert_creds(old_cred); - if (res <= 0 || size == 0) - return res; - - /* filter out private xattrs */ - for (s = list, len = res; len;) { - size_t slen = strnlen(s, len) + 1; - - /* underlying fs providing us with an broken xattr list? */ - if (WARN_ON(slen > len)) - return -EIO; - - len -= slen; - if (!ovl_can_list(dentry->d_sb, s)) { - res -= slen; - memmove(s, s + slen, len); - } else { - s += slen; - } - } - - return res; -} - #ifdef CONFIG_FS_POSIX_ACL /* * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone @@ -611,10 +487,6 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - err = ovl_want_write(dentry); - if (err) - return err; - /* * If ACL is to be removed from a lower file, check if it exists in * the first place before copying it up. @@ -630,7 +502,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, revert_creds(old_cred); if (IS_ERR(real_acl)) { err = PTR_ERR(real_acl); - goto out_drop_write; + goto out; } posix_acl_release(real_acl); } @@ -638,23 +510,26 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, if (!upperdentry) { err = ovl_copy_up(dentry); if (err) - goto out_drop_write; + goto out; realdentry = ovl_dentry_upper(dentry); } + err = ovl_want_write(dentry); + if (err) + goto out; + old_cred = ovl_override_creds(dentry->d_sb); if (acl) err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); else err = ovl_do_remove_acl(ofs, realdentry, acl_name); revert_creds(old_cred); + ovl_drop_write(dentry); /* copy c/mtime */ ovl_copyattr(inode); - -out_drop_write: - ovl_drop_write(dentry); +out: return err; } @@ -778,14 +653,14 @@ int ovl_fileattr_set(struct mnt_idmap *idmap, unsigned int flags; int err; - err = ovl_want_write(dentry); - if (err) - goto out; - err = ovl_copy_up(dentry); if (!err) { ovl_path_real(dentry, &upperpath); + err = ovl_want_write(dentry); + if (err) + goto out; + old_cred = ovl_override_creds(inode->i_sb); /* * Store immutable/append-only flags in xattr and clear them @@ -798,6 +673,7 @@ int ovl_fileattr_set(struct mnt_idmap *idmap, if (!err) err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); + ovl_drop_write(dentry); /* * Merge real inode flags with inode flags read from @@ -812,7 +688,6 @@ int ovl_fileattr_set(struct mnt_idmap *idmap, /* Update ctime */ ovl_copyattr(inode); } - ovl_drop_write(dentry); out: return err; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 80391c687c2a..03bc8d5dfa31 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -251,7 +251,10 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, err = -EREMOTE; goto out_err; } - if (ovl_is_whiteout(this)) { + + path.dentry = this; + path.mnt = d->mnt; + if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) { d->stop = d->opaque = true; goto put_and_out; } @@ -264,8 +267,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, goto put_and_out; } - path.dentry = this; - path.mnt = d->mnt; if (!d_can_lookup(this)) { if (d->is_dir || !last_element) { d->stop = true; @@ -438,7 +439,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, else if (IS_ERR(origin)) return PTR_ERR(origin); - if (upperdentry && !ovl_is_whiteout(upperdentry) && + if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) && inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode)) goto invalid; @@ -507,6 +508,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, return err; } +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, const struct ovl_fh *fh, + bool is_upper, bool set) +{ + int err; + + err = ovl_verify_fh(ofs, dentry, ox, fh); + if (set && err == -ENODATA) + err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); + + return err; +} + /* * Verify that @real dentry matches the file handle stored in xattr @name. * @@ -515,9 +529,9 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, * * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, bool is_upper, - bool set) +int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, + bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; @@ -530,9 +544,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, goto fail; } - err = ovl_verify_fh(ofs, dentry, ox, fh); - if (set && err == -ENODATA) - err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); + err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set); if (err) goto fail; @@ -548,6 +560,7 @@ fail: goto out; } + /* Get upper dentry from index */ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected) @@ -684,7 +697,7 @@ orphan: goto out; } -static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name) { char *n, *s; @@ -873,20 +886,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, struct dentry *lower, struct dentry *upper) { + const struct ovl_fh *fh; int err; if (ovl_check_origin_xattr(ofs, upper)) return 0; + fh = ovl_get_origin_fh(ofs, lower); + if (IS_ERR(fh)) + return PTR_ERR(fh); + err = ovl_want_write(dentry); if (err) - return err; + goto out; - err = ovl_set_origin(ofs, lower, upper); + err = ovl_set_origin_fh(ofs, fh, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); ovl_drop_write(dentry); +out: + kfree(fh); return err; } @@ -1383,7 +1403,11 @@ bool ovl_lower_positive(struct dentry *dentry) break; } } else { - positive = !ovl_is_whiteout(this); + struct path path = { + .dentry = this, + .mnt = parentpath->layer->mnt, + }; + positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path); done = true; dput(this); } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9817b2dcb132..ca88b2636a57 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -28,7 +28,16 @@ enum ovl_path_type { #define OVL_XATTR_NAMESPACE "overlay." #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1) + +#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE +#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1) +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1) +#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX +#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1) enum ovl_xattr { OVL_XATTR_OPAQUE, @@ -40,6 +49,8 @@ enum ovl_xattr { OVL_XATTR_UUID, OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, + OVL_XATTR_XWHITEOUT, + OVL_XATTR_XWHITEOUTS, }; enum ovl_inode_flag { @@ -398,6 +409,10 @@ static inline bool ovl_open_flags_need_copy_up(int flags) } /* util.c */ +int ovl_get_write_access(struct dentry *dentry); +void ovl_put_write_access(struct dentry *dentry); +void ovl_start_write(struct dentry *dentry); +void ovl_end_write(struct dentry *dentry); int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); @@ -460,6 +475,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_inode_version_get(struct inode *inode); bool ovl_is_whiteout(struct dentry *dentry); +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path); struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); @@ -467,9 +483,21 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags); bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); +bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); +static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs, + struct dentry *upperdentry) +{ + struct path upperpath = { + .dentry = upperdentry, + .mnt = ovl_upper_mnt(ofs), + }; + return ovl_path_is_whiteout(ofs, &upperpath); +} + static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *upperdentry) { @@ -624,11 +652,15 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, bool is_upper, - bool set); + enum ovl_xattr ox, const struct ovl_fh *fh, + bool is_upper, bool set); +int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, + bool is_upper, bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); @@ -640,17 +672,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); +static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper, + const struct ovl_fh *fh, bool set) +{ + return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set); +} + static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { - return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin, - false, set); + return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin, + false, set); } static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) { - return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set); + return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper, + true, set); } /* readdir.c */ @@ -684,17 +723,8 @@ int ovl_set_nlink_lower(struct dentry *dentry); unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); -int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr); -int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, - struct kstat *stat, u32 request_mask, unsigned int flags); int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, - const void *value, size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, - void *value, size_t size); -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, @@ -815,8 +845,9 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, - struct dentry *upper); +struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; @@ -830,3 +861,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs) { return (!ovl_upper_mnt(ofs) || !ofs->workdir); } + +/* xattr.c */ + +const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs); +int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); +int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index f6ff23fd101c..ddab9ea267d1 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -43,8 +43,10 @@ module_param_named(metacopy, ovl_metacopy_def, bool, 0644); MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); -enum { +enum ovl_opt { Opt_lowerdir, + Opt_lowerdir_add, + Opt_datadir_add, Opt_upperdir, Opt_workdir, Opt_default_permissions, @@ -140,8 +142,11 @@ static int ovl_verity_mode_def(void) #define fsparam_string_empty(NAME, OPT) \ __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) + const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), + fsparam_string("lowerdir+", Opt_lowerdir_add), + fsparam_string("datadir+", Opt_datadir_add), fsparam_string("upperdir", Opt_upperdir), fsparam_string("workdir", Opt_workdir), fsparam_flag("default_permissions", Opt_default_permissions), @@ -238,19 +243,8 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) pr_err("failed to resolve '%s': %i\n", name, err); goto out; } - err = -EINVAL; - if (ovl_dentry_weird(path->dentry)) { - pr_err("filesystem on '%s' not supported\n", name); - goto out_put; - } - if (!d_is_dir(path->dentry)) { - pr_err("'%s' not a directory\n", name); - goto out_put; - } return 0; -out_put: - path_put_init(path); out: return err; } @@ -268,7 +262,7 @@ static void ovl_unescape(char *s) } } -static int ovl_mount_dir(const char *name, struct path *path, bool upper) +static int ovl_mount_dir(const char *name, struct path *path) { int err = -ENOMEM; char *tmp = kstrdup(name, GFP_KERNEL); @@ -276,68 +270,147 @@ static int ovl_mount_dir(const char *name, struct path *path, bool upper) if (tmp) { ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - - if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) { - pr_err("filesystem on '%s' not supported as upperdir\n", - tmp); - path_put_init(path); - err = -EINVAL; - } kfree(tmp); } return err; } -static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc, - bool workdir) +static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path, + enum ovl_opt layer, const char *name, bool upper) { - int err; - struct ovl_fs *ofs = fc->s_fs_info; - struct ovl_config *config = &ofs->config; struct ovl_fs_context *ctx = fc->fs_private; - struct path path; - char *dup; - err = ovl_mount_dir(name, &path, true); - if (err) - return err; + if (ovl_dentry_weird(path->dentry)) + return invalfc(fc, "filesystem on %s not supported", name); + + if (!d_is_dir(path->dentry)) + return invalfc(fc, "%s is not a directory", name); + /* * Check whether upper path is read-only here to report failures * early. Don't forget to recheck when the superblock is created * as the mount attributes could change. */ - if (__mnt_is_readonly(path.mnt)) { - path_put(&path); - return -EINVAL; + if (upper) { + if (path->dentry->d_flags & DCACHE_OP_REAL) + return invalfc(fc, "filesystem on %s not supported as upperdir", name); + if (__mnt_is_readonly(path->mnt)) + return invalfc(fc, "filesystem on %s is read-only", name); + } else { + if (ctx->lowerdir_all && layer != Opt_lowerdir) + return invalfc(fc, "lowerdir+ and datadir+ cannot follow lowerdir"); + if (ctx->nr_data && layer == Opt_lowerdir_add) + return invalfc(fc, "regular lower layers cannot follow data layers"); + if (ctx->nr == OVL_MAX_STACK) + return invalfc(fc, "too many lower directories, limit is %d", + OVL_MAX_STACK); } + return 0; +} - dup = kstrdup(name, GFP_KERNEL); - if (!dup) { - path_put(&path); +static int ovl_ctx_realloc_lower(struct fs_context *fc) +{ + struct ovl_fs_context *ctx = fc->fs_private; + struct ovl_fs_context_layer *l; + size_t nr; + + if (ctx->nr < ctx->capacity) + return 0; + + nr = min_t(size_t, max(4096 / sizeof(*l), ctx->capacity * 2), + OVL_MAX_STACK); + l = krealloc_array(ctx->lower, nr, sizeof(*l), GFP_KERNEL_ACCOUNT); + if (!l) return -ENOMEM; + + ctx->lower = l; + ctx->capacity = nr; + return 0; +} + +static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, + struct path *path, char **pname) +{ + struct ovl_fs *ofs = fc->s_fs_info; + struct ovl_config *config = &ofs->config; + struct ovl_fs_context *ctx = fc->fs_private; + struct ovl_fs_context_layer *l; + + switch (layer) { + case Opt_workdir: + swap(config->workdir, *pname); + swap(ctx->work, *path); + break; + case Opt_upperdir: + swap(config->upperdir, *pname); + swap(ctx->upper, *path); + break; + case Opt_datadir_add: + ctx->nr_data++; + fallthrough; + case Opt_lowerdir_add: + WARN_ON(ctx->nr >= ctx->capacity); + l = &ctx->lower[ctx->nr++]; + memset(l, 0, sizeof(*l)); + swap(l->name, *pname); + swap(l->path, *path); + break; + default: + WARN_ON(1); } +} - if (workdir) { - kfree(config->workdir); - config->workdir = dup; - path_put(&ctx->work); - ctx->work = path; - } else { - kfree(config->upperdir); - config->upperdir = dup; - path_put(&ctx->upper); - ctx->upper = path; +static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param, + enum ovl_opt layer) +{ + char *name = kstrdup(param->string, GFP_KERNEL); + bool upper = (layer == Opt_upperdir || layer == Opt_workdir); + struct path path; + int err; + + if (!name) + return -ENOMEM; + + if (upper) + err = ovl_mount_dir(name, &path); + else + err = ovl_mount_dir_noesc(name, &path); + if (err) + goto out_free; + + err = ovl_mount_dir_check(fc, &path, layer, name, upper); + if (err) + goto out_put; + + if (!upper) { + err = ovl_ctx_realloc_lower(fc); + if (err) + goto out_put; } - return 0; + + /* Store the user provided path string in ctx to show in mountinfo */ + ovl_add_layer(fc, layer, &path, &name); + +out_put: + path_put(&path); +out_free: + kfree(name); + return err; } -static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) +static void ovl_reset_lowerdirs(struct ovl_fs_context *ctx) { - for (size_t nr = 0; nr < ctx->nr; nr++) { - path_put(&ctx->lower[nr].path); - kfree(ctx->lower[nr].name); - ctx->lower[nr].name = NULL; + struct ovl_fs_context_layer *l = ctx->lower; + + // Reset old user provided lowerdir string + kfree(ctx->lowerdir_all); + ctx->lowerdir_all = NULL; + + for (size_t nr = 0; nr < ctx->nr; nr++, l++) { + path_put(&l->path); + kfree(l->name); + l->name = NULL; } ctx->nr = 0; ctx->nr_data = 0; @@ -346,7 +419,7 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx) /* * Parse lowerdir= mount option: * - * (1) lowerdir=/lower1:/lower2:/lower3::/data1::/data2 + * e.g.: lowerdir=/lower1:/lower2:/lower3::/data1::/data2 * Set "/lower1", "/lower2", and "/lower3" as lower layers and * "/data1" and "/data2" as data lower layers. Any existing lower * layers are replaced. @@ -356,9 +429,9 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) int err; struct ovl_fs_context *ctx = fc->fs_private; struct ovl_fs_context_layer *l; - char *dup = NULL, *dup_iter; + char *dup = NULL, *iter; ssize_t nr_lower = 0, nr = 0, nr_data = 0; - bool append = false, data_layer = false; + bool data_layer = false; /* * Ensure we're backwards compatible with mount(2) @@ -366,16 +439,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) */ /* drop all existing lower layers */ - if (!*name) { - ovl_parse_param_drop_lowerdir(ctx); + ovl_reset_lowerdirs(ctx); + + if (!*name) return 0; - } if (*name == ':') { pr_err("cannot append lower layer"); return -EINVAL; } + // Store user provided lowerdir string to show in mount options + ctx->lowerdir_all = kstrdup(name, GFP_KERNEL); + if (!ctx->lowerdir_all) + return -ENOMEM; + dup = kstrdup(name, GFP_KERNEL); if (!dup) return -ENOMEM; @@ -385,36 +463,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) if (nr_lower < 0) goto out_err; - if ((nr_lower > OVL_MAX_STACK) || - (append && (size_add(ctx->nr, nr_lower) > OVL_MAX_STACK))) { + if (nr_lower > OVL_MAX_STACK) { pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_err; } - if (!append) - ovl_parse_param_drop_lowerdir(ctx); - - /* - * (1) append - * - * We want nr <= nr_lower <= capacity We know nr > 0 and nr <= - * capacity. If nr == 0 this wouldn't be append. If nr + - * nr_lower is <= capacity then nr <= nr_lower <= capacity - * already holds. If nr + nr_lower exceeds capacity, we realloc. - * - * (2) replace - * - * Ensure we're backwards compatible with mount(2) which allows - * "lowerdir=/a:/b:/c,lowerdir=/d:/e:/f" causing the last - * specified lowerdir mount option to win. - * - * We want nr <= nr_lower <= capacity We know either (i) nr == 0 - * or (ii) nr > 0. We also know nr_lower > 0. The capacity - * could've been changed multiple times already so we only know - * nr <= capacity. If nr + nr_lower > capacity we realloc, - * otherwise nr <= nr_lower <= capacity holds already. - */ - nr_lower += ctx->nr; if (nr_lower > ctx->capacity) { err = -ENOMEM; l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower), @@ -426,41 +479,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ctx->capacity = nr_lower; } - /* - * (3) By (1) and (2) we know nr <= nr_lower <= capacity. - * (4) If ctx->nr == 0 => replace - * We have verified above that the lowerdir mount option - * isn't an append, i.e., the lowerdir mount option - * doesn't start with ":" or "::". - * (4.1) The lowerdir mount options only contains regular lower - * layers ":". - * => Nothing to verify. - * (4.2) The lowerdir mount options contains regular ":" and - * data "::" layers. - * => We need to verify that data lower layers "::" aren't - * followed by regular ":" lower layers - * (5) If ctx->nr > 0 => append - * We know that there's at least one regular layer - * otherwise we would've failed when parsing the previous - * lowerdir mount option. - * (5.1) The lowerdir mount option is a regular layer ":" append - * => We need to verify that no data layers have been - * specified before. - * (5.2) The lowerdir mount option is a data layer "::" append - * We know that there's at least one regular layer or - * other data layers. => There's nothing to verify. - */ - dup_iter = dup; - for (nr = ctx->nr; nr < nr_lower; nr++) { - l = &ctx->lower[nr]; + iter = dup; + l = ctx->lower; + for (nr = 0; nr < nr_lower; nr++, l++) { memset(l, 0, sizeof(*l)); - err = ovl_mount_dir(dup_iter, &l->path, false); + err = ovl_mount_dir(iter, &l->path); + if (err) + goto out_put; + + err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false); if (err) goto out_put; err = -ENOMEM; - l->name = kstrdup(dup_iter, GFP_KERNEL_ACCOUNT); + l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT); if (!l->name) goto out_put; @@ -472,8 +505,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) break; err = -EINVAL; - dup_iter = strchr(dup_iter, '\0') + 1; - if (*dup_iter) { + iter = strchr(iter, '\0') + 1; + if (*iter) { /* * This is a regular layer so we require that * there are no data layers. @@ -489,7 +522,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) /* This is a data lower layer. */ data_layer = true; - dup_iter++; + iter++; } ctx->nr = nr_lower; ctx->nr_data += nr_data; @@ -497,21 +530,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) return 0; out_put: - /* - * We know nr >= ctx->nr < nr_lower. If we failed somewhere - * we want to undo until nr == ctx->nr. This is correct for - * both ctx->nr == 0 and ctx->nr > 0. - */ - for (; nr >= ctx->nr; nr--) { - l = &ctx->lower[nr]; - kfree(l->name); - l->name = NULL; - path_put(&l->path); - - /* don't overflow */ - if (nr == 0) - break; - } + ovl_reset_lowerdirs(ctx); out_err: kfree(dup); @@ -556,11 +575,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_lowerdir: err = ovl_parse_param_lowerdir(param->string, fc); break; + case Opt_lowerdir_add: + case Opt_datadir_add: case Opt_upperdir: - fallthrough; case Opt_workdir: - err = ovl_parse_param_upperdir(param->string, fc, - (Opt_workdir == opt)); + err = ovl_parse_layer(fc, param, opt); break; case Opt_default_permissions: config->default_permissions = true; @@ -617,7 +636,7 @@ static int ovl_get_tree(struct fs_context *fc) static inline void ovl_fs_context_free(struct ovl_fs_context *ctx) { - ovl_parse_param_drop_lowerdir(ctx); + ovl_reset_lowerdirs(ctx); path_put(&ctx->upper); path_put(&ctx->work); kfree(ctx->lower); @@ -933,23 +952,28 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); - size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; + size_t nr, nr_merged_lower, nr_lower = 0; + char **lowerdirs = ofs->config.lowerdirs; /* - * lowerdirs[] starts from offset 1, then - * >= 0 regular lower layers prefixed with : and - * >= 0 data-only lower layers prefixed with :: - * - * we need to escase comma and space like seq_show_option() does and - * we also need to escape the colon separator from lowerdir paths. + * lowerdirs[0] holds the colon separated list that user provided + * with lowerdir mount option. + * lowerdirs[1..numlayer] hold the lowerdir paths that were added + * using the lowerdir+ and datadir+ mount options. + * For now, we do not allow mixing the legacy lowerdir mount option + * with the new lowerdir+ and datadir+ mount options. */ - seq_puts(m, ",lowerdir="); - for (nr = 1; nr < ofs->numlayer; nr++) { - if (nr > 1) - seq_putc(m, ':'); - if (nr >= nr_merged_lower) - seq_putc(m, ':'); - seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\"); + if (lowerdirs[0]) { + seq_show_option(m, "lowerdir", lowerdirs[0]); + } else { + nr_lower = ofs->numlayer; + nr_merged_lower = nr_lower - ofs->numdatalayer; + } + for (nr = 1; nr < nr_lower; nr++) { + if (nr < nr_merged_lower) + seq_show_option(m, "lowerdir+", lowerdirs[nr]); + else + seq_show_option(m, "datadir+", lowerdirs[nr]); } if (ofs->config.upperdir) { seq_show_option(m, "upperdir", ofs->config.upperdir); diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h index 8750da68ab2a..c96d93982021 100644 --- a/fs/overlayfs/params.h +++ b/fs/overlayfs/params.h @@ -32,6 +32,7 @@ struct ovl_fs_context { size_t nr_data; struct ovl_opt_set set; struct ovl_fs_context_layer *lower; + char *lowerdir_all; /* user provided lowerdir string */ }; int ovl_init_fs_context(struct fs_context *fc); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index de39e067ae65..a490fc47c3e7 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -25,6 +25,7 @@ struct ovl_cache_entry { struct ovl_cache_entry *next_maybe_whiteout; bool is_upper; bool is_whiteout; + bool check_xwhiteout; char name[]; }; @@ -47,6 +48,7 @@ struct ovl_readdir_data { int err; bool is_upper; bool d_type_supported; + bool in_xwhiteouts_dir; }; struct ovl_dir_file { @@ -162,6 +164,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, p->ino = 0; p->is_upper = rdd->is_upper; p->is_whiteout = false; + /* Defer check for overlay.whiteout to ovl_iterate() */ + p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; if (d_type == DT_CHR) { p->next_maybe_whiteout = rdd->first_maybe_whiteout; @@ -301,6 +305,8 @@ static inline int ovl_dir_read(const struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); + rdd->in_xwhiteouts_dir = rdd->dentry && + ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath); rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { @@ -447,7 +453,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, } /* - * Set d_ino for upper entries. Non-upper entries should always report + * Set d_ino for upper entries if needed. Non-upper entries should always report * the uppermost real inode ino and should not call this function. * * When not all layer are on same fs, report real ino also for upper. @@ -455,8 +461,11 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, * When all layers are on the same fs, and upper has a reference to * copy up origin, call vfs_getattr() on the overlay entry to make * sure that d_ino will be consistent with st_ino from stat(2). + * + * Also checks the overlay.whiteout xattr by doing a full lookup which will return + * negative in this case. */ -static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p) +static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) { struct dentry *dir = path->dentry; @@ -467,7 +476,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry int xinobits = ovl_xino_bits(ofs); int err = 0; - if (!ovl_same_dev(ofs)) + if (!ovl_same_dev(ofs) && !p->check_xwhiteout) goto out; if (p->name[0] == '.') { @@ -481,6 +490,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry goto get; } } + /* This checks also for xwhiteouts */ this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { /* Mark a stale entry */ @@ -494,6 +504,9 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry } get: + if (!ovl_same_dev(ofs) || !update_ino) + goto out; + type = ovl_path_type(this); if (OVL_TYPE_ORIGIN(type)) { struct kstat stat; @@ -572,7 +585,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list, list_for_each_entry_safe(p, n, list, l_node) { if (strcmp(p->name, ".") != 0 && strcmp(p->name, "..") != 0) { - err = ovl_cache_update_ino(path, p); + err = ovl_cache_update(path, p, true); if (err) return err; } @@ -778,13 +791,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) while (od->cursor != &od->cache->entries) { p = list_entry(od->cursor, struct ovl_cache_entry, l_node); if (!p->is_whiteout) { - if (!p->ino) { - err = ovl_cache_update_ino(&file->f_path, p); + if (!p->ino || p->check_xwhiteout) { + err = ovl_cache_update(&file->f_path, p, !p->ino); if (err) goto out; } } - /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + /* ovl_cache_update() sets is_whiteout on stale entry */ if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 17864a8d2b85..a0967bb25003 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -445,68 +445,6 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) return ok; } -static int ovl_own_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - return -EOPNOTSUPP; -} - -static int ovl_own_xattr_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static int ovl_other_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - return ovl_xattr_get(dentry, inode, name, buffer, size); -} - -static int ovl_other_xattr_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return ovl_xattr_set(dentry, inode, name, value, size, flags); -} - -static const struct xattr_handler ovl_own_trusted_xattr_handler = { - .prefix = OVL_XATTR_TRUSTED_PREFIX, - .get = ovl_own_xattr_get, - .set = ovl_own_xattr_set, -}; - -static const struct xattr_handler ovl_own_user_xattr_handler = { - .prefix = OVL_XATTR_USER_PREFIX, - .get = ovl_own_xattr_get, - .set = ovl_own_xattr_set, -}; - -static const struct xattr_handler ovl_other_xattr_handler = { - .prefix = "", /* catch all */ - .get = ovl_other_xattr_get, - .set = ovl_other_xattr_set, -}; - -static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = { - &ovl_own_trusted_xattr_handler, - &ovl_other_xattr_handler, - NULL -}; - -static const struct xattr_handler * const ovl_user_xattr_handlers[] = { - &ovl_own_user_xattr_handler, - &ovl_other_xattr_handler, - NULL -}; - static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, struct inode **ptrap, const char *name) { @@ -647,7 +585,7 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs) if (IS_ERR(whiteout)) goto cleanup_temp; - err = ovl_is_whiteout(whiteout); + err = ovl_upper_is_whiteout(ofs, whiteout); /* Best effort cleanup of whiteout and temp file */ if (err) @@ -887,15 +825,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *indexdir; + struct dentry *origin = ovl_lowerstack(oe)->dentry; + const struct ovl_fh *fh; int err; + fh = ovl_get_origin_fh(ofs, origin); + if (IS_ERR(fh)) + return PTR_ERR(fh); + err = mnt_want_write(mnt); if (err) - return err; + goto out_free_fh; /* Verify lower root is upper root origin */ - err = ovl_verify_origin(ofs, upperpath->dentry, - ovl_lowerstack(oe)->dentry, true); + err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true); if (err) { pr_err("failed to verify upper root origin\n"); goto out; @@ -927,9 +870,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, * directory entries. */ if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { - err = ovl_verify_set_fh(ofs, ofs->indexdir, - OVL_XATTR_ORIGIN, - upperpath->dentry, true, false); + err = ovl_verify_origin_xattr(ofs, ofs->indexdir, + OVL_XATTR_ORIGIN, + upperpath->dentry, true, + false); if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } @@ -947,6 +891,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, out: mnt_drop_write(mnt); +out_free_fh: + kfree(fh); return err; } @@ -1382,8 +1328,11 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) ofs->layers = layers; /* * Layer 0 is reserved for upper even if there's no upper. - * For consistency, config.lowerdirs[0] is NULL. + * config.lowerdirs[0] is used for storing the user provided colon + * separated lowerdir string. */ + ofs->config.lowerdirs[0] = ctx->lowerdir_all; + ctx->lowerdir_all = NULL; ofs->numlayer = 1; sb->s_stack_depth = 0; @@ -1493,8 +1442,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : - ovl_trusted_xattr_handlers; + sb->s_xattr = ovl_xattr_handlers(ofs); sb->s_fs_info = ofs; #ifdef CONFIG_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 868afd8834c3..50a201e9cd39 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -17,12 +17,38 @@ #include <linux/ratelimit.h> #include "overlayfs.h" +/* Get write access to upper mnt - may fail if upper sb was remounted ro */ +int ovl_get_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + return mnt_get_write_access(ovl_upper_mnt(ofs)); +} + +/* Get write access to upper sb - may block if upper sb is frozen */ +void ovl_start_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_start_write(ovl_upper_mnt(ofs)->mnt_sb); +} + int ovl_want_write(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); return mnt_want_write(ovl_upper_mnt(ofs)); } +void ovl_put_write_access(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + mnt_put_write_access(ovl_upper_mnt(ofs)); +} + +void ovl_end_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + sb_end_write(ovl_upper_mnt(ofs)->mnt_sb); +} + void ovl_drop_write(struct dentry *dentry) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -55,7 +81,7 @@ int ovl_can_decode_fh(struct super_block *sb) if (!capable(CAP_DAC_READ_SEARCH)) return 0; - if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry) + if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; @@ -575,6 +601,16 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); } +/* + * Use this over ovl_is_whiteout for upper and lower files, as it also + * handles overlay.whiteout xattr whiteout files. + */ +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path) +{ + return ovl_is_whiteout(path->dentry) || + ovl_path_check_xwhiteout_xattr(ofs, path); +} + struct file *ovl_path_open(const struct path *path, int flags) { struct inode *inode = d_inode(path->dentry); @@ -644,22 +680,36 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags) return false; } +/* + * The copy up "transaction" keeps an elevated mnt write count on upper mnt, + * but leaves taking freeze protection on upper sb to lower level helpers. + */ int ovl_copy_up_start(struct dentry *dentry, int flags) { struct inode *inode = d_inode(dentry); int err; err = ovl_inode_lock_interruptible(inode); - if (!err && ovl_already_copied_up_locked(dentry, flags)) { + if (err) + return err; + + if (ovl_already_copied_up_locked(dentry, flags)) err = 1; /* Already copied up */ - ovl_inode_unlock(inode); - } + else + err = ovl_get_write_access(dentry); + if (err) + goto out_unlock; + + return 0; +out_unlock: + ovl_inode_unlock(inode); return err; } void ovl_copy_up_end(struct dentry *dentry) { + ovl_put_write_access(dentry); ovl_inode_unlock(d_inode(dentry)); } @@ -676,6 +726,32 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path) return false; } +bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path) +{ + struct dentry *dentry = path->dentry; + int res; + + /* xattr.whiteout must be a zero size regular file */ + if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0) + return false; + + res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0); + return res >= 0; +} + +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path) +{ + struct dentry *dentry = path->dentry; + int res; + + /* xattr.whiteouts must be a directory */ + if (!d_is_dir(dentry)) + return false; + + res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0); + return res >= 0; +} + /* * Load persistent uuid from xattr into s_uuid if found, or store a new * random generated value in s_uuid and in xattr. @@ -760,6 +836,8 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, #define OVL_XATTR_UUID_POSTFIX "uuid" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" #define OVL_XATTR_PROTATTR_POSTFIX "protattr" +#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" +#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -775,6 +853,8 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -973,12 +1053,18 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *index = NULL; struct inode *inode; struct qstr name = { }; + bool got_write = false; int err; err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail; + err = ovl_want_write(dentry); + if (err) + goto fail; + + got_write = true; inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", @@ -1016,6 +1102,8 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; out: + if (got_write) + ovl_drop_write(dentry); kfree(name.name); dput(index); return; @@ -1062,8 +1150,12 @@ int ovl_nlink_start(struct dentry *dentry) if (err) return err; + err = ovl_want_write(dentry); + if (err) + goto out_unlock; + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode)) - goto out; + return 0; old_cred = ovl_override_creds(dentry->d_sb); /* @@ -1074,10 +1166,15 @@ int ovl_nlink_start(struct dentry *dentry) */ err = ovl_set_nlink_upper(dentry); revert_creds(old_cred); - -out: if (err) - ovl_inode_unlock(inode); + goto out_drop_write; + + return 0; + +out_drop_write: + ovl_drop_write(dentry); +out_unlock: + ovl_inode_unlock(inode); return err; } @@ -1086,6 +1183,8 @@ void ovl_nlink_end(struct dentry *dentry) { struct inode *inode = d_inode(dentry); + ovl_drop_write(dentry); + if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { const struct cred *old_cred; @@ -1403,6 +1502,7 @@ void ovl_copyattr(struct inode *inode) realinode = ovl_i_path_real(inode, &realpath); real_idmap = mnt_idmap(realpath.mnt); + spin_lock(&inode->i_lock); vfsuid = i_uid_into_vfsuid(real_idmap, realinode); vfsgid = i_gid_into_vfsgid(real_idmap, realinode); @@ -1413,4 +1513,5 @@ void ovl_copyattr(struct inode *inode) inode_set_mtime_to_ts(inode, inode_get_mtime(realinode)); inode_set_ctime_to_ts(inode, inode_get_ctime(realinode)); i_size_write(inode, i_size_read(realinode)); + spin_unlock(&inode->i_lock); } diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c new file mode 100644 index 000000000000..383978e4663c --- /dev/null +++ b/fs/overlayfs/xattrs.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/fs.h> +#include <linux/xattr.h> +#include "overlayfs.h" + +static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + if (ofs->config.userxattr) + return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX, + OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0; + else + return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX, + OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0; +} + +static bool ovl_is_own_xattr(struct super_block *sb, const char *name) +{ + struct ovl_fs *ofs = OVL_FS(sb); + + if (ofs->config.userxattr) + return strncmp(name, OVL_XATTR_USER_PREFIX, + OVL_XATTR_USER_PREFIX_LEN) == 0; + else + return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, + OVL_XATTR_TRUSTED_PREFIX_LEN) == 0; +} + +bool ovl_is_private_xattr(struct super_block *sb, const char *name) +{ + return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name); +} + +static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); + struct path realpath; + const struct cred *old_cred; + + if (!value && !upperdentry) { + ovl_path_lower(dentry, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); + err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); + revert_creds(old_cred); + if (err < 0) + goto out; + } + + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out; + + realdentry = ovl_dentry_upper(dentry); + } + + err = ovl_want_write(dentry); + if (err) + goto out; + + old_cred = ovl_override_creds(dentry->d_sb); + if (value) { + err = ovl_do_setxattr(ofs, realdentry, name, value, size, + flags); + } else { + WARN_ON(flags != XATTR_REPLACE); + err = ovl_do_removexattr(ofs, realdentry, name); + } + revert_creds(old_cred); + ovl_drop_write(dentry); + + /* copy c/mtime */ + ovl_copyattr(inode); +out: + return err; +} + +static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, + void *value, size_t size) +{ + ssize_t res; + const struct cred *old_cred; + struct path realpath; + + ovl_i_path_real(inode, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); + res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); + revert_creds(old_cred); + return res; +} + +static bool ovl_can_list(struct super_block *sb, const char *s) +{ + /* Never list private (.overlay) */ + if (ovl_is_private_xattr(sb, s)) + return false; + + /* List all non-trusted xattrs */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* list other trusted for superuser only */ + return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); +} + +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct dentry *realdentry = ovl_dentry_real(dentry); + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + ssize_t res; + size_t len; + char *s; + const struct cred *old_cred; + size_t prefix_len, name_len; + + old_cred = ovl_override_creds(dentry->d_sb); + res = vfs_listxattr(realdentry, list, size); + revert_creds(old_cred); + if (res <= 0 || size == 0) + return res; + + prefix_len = ofs->config.userxattr ? + OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN; + + /* filter out private xattrs */ + for (s = list, len = res; len;) { + size_t slen = strnlen(s, len) + 1; + + /* underlying fs providing us with an broken xattr list? */ + if (WARN_ON(slen > len)) + return -EIO; + + len -= slen; + if (!ovl_can_list(dentry->d_sb, s)) { + res -= slen; + memmove(s, s + slen, len); + } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) { + res -= OVL_XATTR_ESCAPE_PREFIX_LEN; + name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN; + s += prefix_len; + memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len); + s += name_len; + } else { + s += slen; + } + } + + return res; +} + +static char *ovl_xattr_escape_name(const char *prefix, const char *name) +{ + size_t prefix_len = strlen(prefix); + size_t name_len = strlen(name); + size_t escaped_len; + char *escaped, *s; + + escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len; + if (escaped_len > XATTR_NAME_MAX) + return ERR_PTR(-EOPNOTSUPP); + + escaped = kmalloc(escaped_len + 1, GFP_KERNEL); + if (escaped == NULL) + return ERR_PTR(-ENOMEM); + + s = escaped; + memcpy(s, prefix, prefix_len); + s += prefix_len; + memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN); + s += OVL_XATTR_ESCAPE_PREFIX_LEN; + memcpy(s, name, name_len + 1); + + return escaped; +} + +static int ovl_own_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + char *escaped; + int r; + + escaped = ovl_xattr_escape_name(handler->prefix, name); + if (IS_ERR(escaped)) + return PTR_ERR(escaped); + + r = ovl_xattr_get(dentry, inode, escaped, buffer, size); + + kfree(escaped); + + return r; +} + +static int ovl_own_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + char *escaped; + int r; + + escaped = ovl_xattr_escape_name(handler->prefix, name); + if (IS_ERR(escaped)) + return PTR_ERR(escaped); + + r = ovl_xattr_set(dentry, inode, escaped, value, size, flags); + + kfree(escaped); + + return r; +} + +static int ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, inode, name, buffer, size); +} + +static int ovl_other_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + return ovl_xattr_set(dentry, inode, name, value, size, flags); +} + +static const struct xattr_handler ovl_own_trusted_xattr_handler = { + .prefix = OVL_XATTR_TRUSTED_PREFIX, + .get = ovl_own_xattr_get, + .set = ovl_own_xattr_set, +}; + +static const struct xattr_handler ovl_own_user_xattr_handler = { + .prefix = OVL_XATTR_USER_PREFIX, + .get = ovl_own_xattr_get, + .set = ovl_own_xattr_set, +}; + +static const struct xattr_handler ovl_other_xattr_handler = { + .prefix = "", /* catch all */ + .get = ovl_other_xattr_get, + .set = ovl_other_xattr_set, +}; + +static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = { + &ovl_own_trusted_xattr_handler, + &ovl_other_xattr_handler, + NULL +}; + +static const struct xattr_handler * const ovl_user_xattr_handlers[] = { + &ovl_own_user_xattr_handler, + &ovl_other_xattr_handler, + NULL +}; + +const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs) +{ + return ofs->config.userxattr ? ovl_user_xattr_handlers : + ovl_trusted_xattr_handlers; +} + diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 6d8804fb6535..5596c9f30ccb 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -136,6 +136,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) { struct TCP_Server_Info *server = chan->server; + if (!server) { + seq_printf(m, "\n\n\t\tChannel: %d DISABLED", i+1); + return; + } + seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx" "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x" "\n\t\tTCP status: %d Instance: %d" @@ -279,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_server_iface *iface; + size_t iface_weight = 0, iface_min_speed = 0; + struct cifs_server_iface *last_iface = NULL; int c, i, j; seq_puts(m, @@ -544,11 +551,25 @@ skip_rdma: "\tLast updated: %lu seconds ago", ses->iface_count, (jiffies - ses->iface_last_update) / HZ); + + last_iface = list_last_entry(&ses->iface_list, + struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + j = 0; list_for_each_entry(iface, &ses->iface_list, iface_head) { seq_printf(m, "\n\t%d)", ++j); cifs_dump_iface(m, iface); + + iface_weight = iface->speed / iface_min_speed; + seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)" + "\n\t\tAllocated channels: %u\n", + iface->weight_fulfilled, + iface_weight, + iface->num_channels); + if (is_ses_using_iface(ses, iface)) seq_puts(m, "\t\t[CONNECTED]\n"); } @@ -746,14 +767,14 @@ static ssize_t name##_write(struct file *file, const char __user *buffer, \ size_t count, loff_t *ppos) \ { \ int rc; \ - rc = kstrtoint_from_user(buffer, count, 10, & name); \ + rc = kstrtoint_from_user(buffer, count, 10, &name); \ if (rc) \ return rc; \ return count; \ } \ static int name##_proc_show(struct seq_file *m, void *v) \ { \ - seq_printf(m, "%d\n", name ); \ + seq_printf(m, "%d\n", name); \ return 0; \ } \ static int name##_open(struct inode *inode, struct file *file) \ diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 332588e77c31..26327442e383 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -26,6 +26,11 @@ struct smb_mnt_fs_info { __u64 cifs_posix_caps; } __packed; +struct smb_mnt_tcon_info { + __u32 tid; + __u64 session_id; +} __packed; + struct smb_snapshot_array { __u32 number_of_snapshots; __u32 number_of_snapshots_returned; @@ -108,6 +113,7 @@ struct smb3_notify_info { #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info) +#define CIFS_IOC_GET_TCON_INFO _IOR(CIFS_IOCTL_MAGIC, 12, struct smb_mnt_tcon_info) #define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 8ca3d7606bb4..3adea10aa9da 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -152,6 +152,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 45 -#define CIFS_VERSION "2.45" +#define SMB3_PRODUCT_BUILD 46 +#define CIFS_VERSION "2.46" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 02082621d8e0..6ffbd81bd109 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -650,6 +650,7 @@ struct TCP_Server_Info { bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; + bool terminate; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ @@ -969,6 +970,8 @@ struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; + size_t weight_fulfilled; + unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ @@ -1050,6 +1053,7 @@ struct cifs_ses { spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 +#define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ @@ -2143,6 +2147,7 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, unsigned int len, skip; unsigned int nents = 0; unsigned long addr; + size_t data_size; int i, j; /* @@ -2158,17 +2163,21 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { + data_size = iov_iter_count(&rqst[i].rq_iter); + /* We really don't want a mixture of pinned and unpinned pages * in the sglist. It's hard to keep track of which is what. * Instead, we convert to a BVEC-type iterator higher up. */ - if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) return -EIO; /* We also don't want to have any extra refs or pins to clean * up in the sglist. */ - if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) return -EIO; for (j = 0; j < rqst[i].rq_nvec; j++) { @@ -2184,7 +2193,8 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, } skip = 0; } - nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); + if (data_size) + nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 890ceddae07e..d87e2c26cce2 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -132,6 +132,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf, struct smb_hdr *out_buf, int *bytes_returned); + void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); @@ -610,13 +611,13 @@ void cifs_free_hash(struct shash_desc **sdesc); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); +int cifs_try_adding_channels(struct cifs_ses *ses); bool is_server_using_iface(struct TCP_Server_Info *server, struct cifs_server_iface *iface); bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); void cifs_ses_mark_for_reconnect(struct cifs_ses *ses); -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server); void @@ -640,6 +641,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, bool cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); +void +cifs_disable_secondary_channels(struct cifs_ses *ses); int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); int diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 1a137b33858a..57c2a7df3457 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -132,6 +132,9 @@ static void smb2_query_server_interfaces(struct work_struct *work) free_xid(xid); if (rc) { + if (rc == -EOPNOTSUPP) + return; + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); } @@ -173,8 +176,12 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { + if (!ses->chans[i].server) + continue; + spin_lock(&ses->chans[i].server->srv_lock); - ses->chans[i].server->tcpStatus = CifsNeedReconnect; + if (ses->chans[i].server->tcpStatus != CifsExiting) + ses->chans[i].server->tcpStatus = CifsNeedReconnect; spin_unlock(&ses->chans[i].server->srv_lock); } spin_unlock(&ses->chan_lock); @@ -212,6 +219,14 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { + /* + * if channel has been marked for termination, nothing to do + * for the channel. in fact, we cannot find the channel for the + * server. So safe to exit here + */ + if (server->terminate) + break; + /* check if iface is still active */ if (!cifs_chan_is_iface_active(ses, server)) cifs_chan_update_iface(ses, server); @@ -246,6 +261,8 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&tcon->tc_lock); tcon->status = TID_NEED_RECON; spin_unlock(&tcon->tc_lock); + + cancel_delayed_work(&tcon->query_interfaces); } if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; @@ -385,7 +402,13 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + + /* increase ref count which reconnect work will drop */ + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); } } while (server->tcpStatus == CifsNeedReconnect); @@ -515,7 +538,13 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + + /* increase ref count which reconnect work will drop */ + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); } while (server->tcpStatus == CifsNeedReconnect); mutex_lock(&server->refpath_lock); @@ -1597,16 +1626,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) cancel_delayed_work_sync(&server->echo); - if (from_reconnect) + if (from_reconnect) { /* * Avoid deadlock here: reconnect work calls * cifs_put_tcp_session() at its end. Need to be sure * that reconnect work does nothing with server pointer after * that step. */ - cancel_delayed_work(&server->reconnect); - else - cancel_delayed_work_sync(&server->reconnect); + if (cancel_delayed_work(&server->reconnect)) + cifs_put_tcp_session(server, from_reconnect); + } else { + if (cancel_delayed_work_sync(&server->reconnect)) + cifs_put_tcp_session(server, from_reconnect); + } spin_lock(&server->srv_lock); server->tcpStatus = CifsExiting; @@ -3560,7 +3592,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ctx->prepath = NULL; out: - cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); + cifs_try_adding_channels(mnt_ctx.ses); rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) goto error; diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 81b84151450d..a8a1d386da65 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -263,15 +263,23 @@ out: return rc; } -/* Resolve UNC hostname in @ctx->source and set ip addr in @ctx->dstaddr */ +/* + * If @ctx->dfs_automount, then update @ctx->dstaddr earlier with the DFS root + * server from where we'll start following any referrals. Otherwise rely on the + * value provided by mount(2) as the user might not have dns_resolver key set up + * and therefore failing to upcall to resolve UNC hostname under @ctx->source. + */ static int update_fs_context_dstaddr(struct smb3_fs_context *ctx) { struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; - int rc; + int rc = 0; - rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); - if (!rc) - cifs_set_port(addr, ctx->port); + if (!ctx->nodfs && ctx->dfs_automount) { + rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); + if (!rc) + cifs_set_port(addr, ctx->port); + ctx->dfs_automount = false; + } return rc; } diff --git a/fs/smb/client/export.c b/fs/smb/client/export.c index 37c28415df1e..d606e8cbcb7d 100644 --- a/fs/smb/client/export.c +++ b/fs/smb/client/export.c @@ -41,13 +41,12 @@ static struct dentry *cifs_get_parent(struct dentry *dentry) } const struct export_operations cifs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .get_parent = cifs_get_parent, -/* Following five export operations are unneeded so far and can default: - .get_dentry = - .get_name = - .find_exported_dentry = - .decode_fh = - .encode_fs = */ +/* + * Following export operations are mandatory for NFS export support: + * .fh_to_dentry = + */ }; #endif /* CONFIG_CIFS_NFSD_EXPORT */ diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 9d8d34af0211..cf46916286d0 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -268,6 +268,7 @@ struct smb3_fs_context { bool witness:1; /* use witness protocol */ char *leaf_fullpath; struct cifs_ses *dfs_root_ses; + bool dfs_automount:1; /* set for dfs automount only */ }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index f7160003e0ed..e2f92c21fff5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -117,6 +117,20 @@ out_drop_write: return rc; } +static long smb_mnt_get_tcon_info(struct cifs_tcon *tcon, void __user *arg) +{ + int rc = 0; + struct smb_mnt_tcon_info tcon_inf; + + tcon_inf.tid = tcon->tid; + tcon_inf.session_id = tcon->ses->Suid; + + if (copy_to_user(arg, &tcon_inf, sizeof(struct smb_mnt_tcon_info))) + rc = -EFAULT; + + return rc; +} + static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, void __user *arg) { @@ -129,6 +143,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, fsinf->version = 1; fsinf->protocol_id = tcon->ses->server->vals->protocol_id; + fsinf->tcon_flags = tcon->Flags; fsinf->device_characteristics = le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics); fsinf->device_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); @@ -414,6 +429,17 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; + case CIFS_IOC_GET_TCON_INFO: + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); + rc = smb_mnt_get_tcon_info(tcon, (void __user *)arg); + cifs_put_tlink(tlink); + break; case CIFS_ENUMERATE_SNAPSHOTS: if (pSMBFile == NULL) break; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index c8f5ed8a69f1..a6968573b775 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -117,6 +117,18 @@ cifs_build_devname(char *nodename, const char *prepath) return dev; } +static bool is_dfs_mount(struct dentry *dentry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + bool ret; + + spin_lock(&tcon->tc_lock); + ret = !!tcon->origin_fullpath; + spin_unlock(&tcon->tc_lock); + return ret; +} + /* Return full path out of a dentry set for automount */ static char *automount_fullpath(struct dentry *dentry, void *page) { @@ -212,8 +224,9 @@ static struct vfsmount *cifs_do_automount(struct path *path) ctx->source = NULL; goto out; } - cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s\n", - __func__, ctx->source, ctx->UNC, ctx->prepath); + ctx->dfs_automount = is_dfs_mount(mntpt); + cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s dfs_automount=%d\n", + __func__, ctx->source, ctx->UNC, ctx->prepath, ctx->dfs_automount); mnt = fc_mount(fc); out: diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index cd474cf98f30..0bb2ac929061 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -24,7 +24,7 @@ #include "fs_context.h" static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface); bool @@ -69,7 +69,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) /* channel helper functions. assumed that chan_lock is held by caller. */ -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server) { @@ -85,14 +85,17 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", server->conn_id); WARN_ON(1); - return 0; + return CIFS_INVAL_CHAN_INDEX; } void cifs_chan_set_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { - unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + int chan_index = cifs_ses_get_chan_index(ses, server); + + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; ses->chans[chan_index].in_reconnect = true; } @@ -103,6 +106,9 @@ cifs_chan_clear_in_reconnect(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; + ses->chans[chan_index].in_reconnect = false; } @@ -112,6 +118,9 @@ cifs_chan_in_reconnect(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ + return CIFS_CHAN_IN_RECONNECT(ses, chan_index); } @@ -121,6 +130,9 @@ cifs_chan_set_need_reconnect(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; + set_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n", chan_index, ses->chans_need_reconnect); @@ -132,6 +144,9 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; + clear_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n", chan_index, ses->chans_need_reconnect); @@ -143,6 +158,9 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ + return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); } @@ -152,19 +170,24 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ + return ses->chans[chan_index].iface && ses->chans[chan_index].iface->is_active; } /* returns number of channels added */ -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) +int cifs_try_adding_channels(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; int old_chan_count, new_chan_count; int left; int rc = 0; int tries = 0; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL, *niface = NULL; + struct cifs_server_iface *last_iface = NULL; spin_lock(&ses->chan_lock); @@ -192,21 +215,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } spin_unlock(&ses->chan_lock); - /* - * Keep connecting to same, fastest, iface for all channels as - * long as its RSS. Try next fastest one if not RSS or channel - * creation fails. - */ - spin_lock(&ses->iface_lock); - iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, - iface_head); - spin_unlock(&ses->iface_lock); - while (left > 0) { tries++; if (tries > 3*ses->chan_max) { - cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n", + cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n", left); break; } @@ -214,23 +227,41 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", + ses->server->hostname); break; } + if (!iface) + iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + list_for_each_entry_safe_from(iface, niface, &ses->iface_list, iface_head) { + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != ses->server->rdma) + continue; + /* skip ifaces that are unusable */ if (!iface->is_active || (is_ses_using_iface(ses, iface) && - !iface->rss_capable)) { + !iface->rss_capable)) + continue; + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) continue; - } /* take ref before unlock */ kref_get(&iface->refcount); spin_unlock(&ses->iface_lock); - rc = cifs_ses_add_channel(cifs_sb, ses, iface); + rc = cifs_ses_add_channel(ses, iface); spin_lock(&ses->iface_lock); if (rc) { @@ -241,10 +272,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) continue; } - cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", + iface->num_channels++; + iface->weight_fulfilled++; + cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n", &iface->sockaddr); break; } + + /* reached end of list. reset weight_fulfilled and start over */ + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_for_each_entry(iface, &ses->iface_list, iface_head) + iface->weight_fulfilled = 0; + spin_unlock(&ses->iface_lock); + iface = NULL; + continue; + } spin_unlock(&ses->iface_lock); left--; @@ -255,6 +297,60 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } /* + * called when multichannel is disabled by the server. + * this always gets called from smb2_reconnect + * and cannot get called in parallel threads. + */ +void +cifs_disable_secondary_channels(struct cifs_ses *ses) +{ + int i, chan_count; + struct TCP_Server_Info *server; + struct cifs_server_iface *iface; + + spin_lock(&ses->chan_lock); + chan_count = ses->chan_count; + if (chan_count == 1) + goto done; + + ses->chan_count = 1; + + /* for all secondary channels reset the need reconnect bit */ + ses->chans_need_reconnect &= 1; + + for (i = 1; i < chan_count; i++) { + iface = ses->chans[i].iface; + server = ses->chans[i].server; + + if (iface) { + spin_lock(&ses->iface_lock); + kref_put(&iface->refcount, release_iface); + ses->chans[i].iface = NULL; + iface->num_channels--; + if (iface->weight_fulfilled) + iface->weight_fulfilled--; + spin_unlock(&ses->iface_lock); + } + + spin_unlock(&ses->chan_lock); + if (server && !server->terminate) { + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + } + spin_lock(&ses->chan_lock); + + if (server) { + ses->chans[i].server = NULL; + cifs_put_tcp_session(server, false); + } + + } + +done: + spin_unlock(&ses->chan_lock); +} + +/* * update the iface for the channel if necessary. * will return 0 when iface is updated, 1 if removed, 2 otherwise * Must be called with chan_lock held. @@ -263,13 +359,16 @@ int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; + struct cifs_server_iface *last_iface = NULL; + struct sockaddr_storage ss; int rc = 0; spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); - if (!chan_index) { + if (chan_index == CIFS_INVAL_CHAN_INDEX) { spin_unlock(&ses->chan_lock); return 0; } @@ -283,14 +382,49 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) } spin_unlock(&ses->chan_lock); + spin_lock(&server->srv_lock); + ss = server->dstaddr; + spin_unlock(&server->srv_lock); + spin_lock(&ses->iface_lock); + if (!ses->iface_count) { + spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname); + return 0; + } + + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { + if (!chan_index) { + /* if we're trying to get the updated iface for primary channel */ + if (!cifs_match_ipaddr((struct sockaddr *) &ss, + (struct sockaddr *) &iface->sockaddr)) + continue; + + kref_get(&iface->refcount); + break; + } + + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != server->rdma) + continue; + if (!iface->is_active || (is_ses_using_iface(ses, iface) && !iface->rss_capable)) { continue; } + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) + continue; + kref_get(&iface->refcount); break; } @@ -301,16 +435,41 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } + if (!chan_index && !iface) { + cifs_dbg(FYI, "unable to get the interface matching: %pIS\n", + &ss); + spin_unlock(&ses->iface_lock); + return 0; + } + /* now drop the ref to the current iface */ if (old_iface && iface) { cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + iface->num_channels++; + iface->weight_fulfilled++; + kref_put(&old_iface->refcount, release_iface); } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + kref_put(&old_iface->refcount, release_iface); + } else if (!chan_index) { + /* special case: update interface for primary channel */ + cifs_dbg(FYI, "referencing primary channel iface: %pIS\n", + &iface->sockaddr); + iface->num_channels++; + iface->weight_fulfilled++; } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); @@ -319,6 +478,11 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + return 0; + } + ses->chans[chan_index].iface = iface; /* No iface is found. if secondary chan, drop connection */ @@ -354,7 +518,7 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server) } static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) { struct TCP_Server_Info *chan_server; @@ -433,7 +597,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, * This will be used for encoding/decoding user/domain/pw * during sess setup auth. */ - ctx->local_nls = cifs_sb->local_nls; + ctx->local_nls = ses->local_nls; /* Use RDMA if possible */ ctx->rdma = iface->rdma_capable; @@ -479,20 +643,16 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, rc = cifs_negotiate_protocol(xid, ses, chan->server); if (!rc) - rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls); + rc = cifs_setup_session(xid, ses, chan->server, ses->local_nls); mutex_unlock(&ses->session_mutex); out: if (rc && chan->server) { - /* - * we should avoid race with these delayed works before we - * remove this channel - */ - cancel_delayed_work_sync(&chan->server->echo); - cancel_delayed_work_sync(&chan->server->reconnect); + cifs_put_tcp_session(chan->server, 0); spin_lock(&ses->chan_lock); + /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); ses->chan_count--; @@ -502,8 +662,6 @@ out: */ WARN_ON(ses->chan_count < 1); spin_unlock(&ses->chan_lock); - - cifs_put_tcp_session(chan->server, 0); } kfree(ctx->UNC); @@ -535,8 +693,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ - /* BB verify whether signing required on neg or just on auth frame - (and NTLM case) */ + /* BB verify whether signing required on neg or just auth frame (and NTLM case) */ capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; @@ -593,8 +750,10 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName == NULL) { - /* Sending null domain better than using a bogus domain name (as - we did briefly in 2.6.18) since server will use its default */ + /* + * Sending null domain better than using a bogus domain name (as + * we did briefly in 2.6.18) since server will use its default + */ *bcc_ptr = 0; *(bcc_ptr+1) = 0; bytes_ret = 0; @@ -613,8 +772,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, char *bcc_ptr = *pbcc_area; int bytes_ret = 0; - /* BB FIXME add check that strings total less - than 335 or will need to send them as arrays */ + /* BB FIXME add check that strings less than 335 or will need to send as arrays */ /* copy user */ if (ses->user_name == NULL) { @@ -659,8 +817,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, if (WARN_ON_ONCE(len < 0)) len = CIFS_MAX_DOMAINNAME_LEN - 1; bcc_ptr += len; - } /* else we will send a null domain name - so the server will default to its own domain */ + } /* else we send a null domain name so server will default to its own domain */ *bcc_ptr = 0; bcc_ptr++; @@ -756,11 +913,14 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, if (len > bleft) return; - /* No domain field in LANMAN case. Domain is - returned by old servers in the SMB negprot response */ - /* BB For newer servers which do not support Unicode, - but thus do return domain here we could add parsing - for it later, but it is not very important */ + /* + * No domain field in LANMAN case. Domain is + * returned by old servers in the SMB negprot response + * + * BB For newer servers which do not support Unicode, + * but thus do return domain here, we could add parsing + * for it later, but it is not very important + */ cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -816,9 +976,12 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = server_flags; memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); - /* In particular we can examine sign flags */ - /* BB spec says that if AvId field of MsvAvTimestamp is populated then - we must set the MIC field of the AUTHENTICATE_MESSAGE */ + /* + * In particular we can examine sign flags + * + * BB spec says that if AvId field of MsvAvTimestamp is populated then + * we must set the MIC field of the AUTHENTICATE_MESSAGE + */ tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 601e7a187f87..a959ed2c9b22 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -756,6 +756,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ unsigned int ret_data_len = 0; struct network_interface_info_ioctl_rsp *out_buf = NULL; struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *pserver; /* do not query too frequently */ if (ses->iface_last_update && @@ -780,6 +781,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ if (rc) goto out; + /* check if iface is still active */ + pserver = ses->chans[0].server; + if (pserver && !cifs_chan_is_iface_active(ses, pserver)) + cifs_chan_update_iface(ses, pserver); + out: kfree(out_buf); return rc; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c75a80bb6d9e..2eb29fa278c3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -163,6 +163,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, int rc = 0; struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; + int xid; + struct TCP_Server_Info *pserver; + unsigned int chan_index; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -223,6 +226,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, return -EAGAIN; } } + + /* if server is marked for termination, cifsd will cleanup */ + if (server->terminate) { + spin_unlock(&server->srv_lock); + return -EHOSTDOWN; + } spin_unlock(&server->srv_lock); again: @@ -242,11 +251,23 @@ again: mutex_lock(&ses->session_mutex); /* + * if this is called by delayed work, and the channel has been disabled + * in parallel, the delayed work can continue to execute in parallel + * there's a chance that this channel may not exist anymore + */ + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; + goto out; + } + + /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed * and tcpStatus set to reconnect. */ - spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); mutex_unlock(&ses->session_mutex); @@ -283,6 +304,53 @@ again: rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { + /* + * if server stopped supporting multichannel + * and the first channel reconnected, disable all the others. + */ + if (ses->chan_count > 1 && + !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + if (SERVER_IS_CHAN(server)) { + cifs_dbg(VFS, "server %s does not support " \ + "multichannel anymore. skipping secondary channel\n", + ses->server->hostname); + + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + goto skip_terminate; + } + + ses->chans[chan_index].server = NULL; + spin_unlock(&ses->chan_lock); + + /* + * the above reference of server by channel + * needs to be dropped without holding chan_lock + * as cifs_put_tcp_session takes a higher lock + * i.e. cifs_tcp_ses_lock + */ + cifs_put_tcp_session(server, 1); + + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + + /* mark primary server as needing reconnect */ + pserver = server->primary_server; + cifs_signal_cifsd_for_reconnect(pserver, false); + +skip_terminate: + mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; + goto out; + } else { + cifs_server_dbg(VFS, "does not support " \ + "multichannel anymore. disabling all other channels\n"); + cifs_disable_secondary_channels(ses); + } + } + rc = cifs_setup_session(0, ses, server, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); @@ -307,17 +375,44 @@ skip_sess_setup: tcon->need_reopen_files = true; rc = cifs_tree_connect(0, tcon, nls_codepage); - mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ + mutex_unlock(&ses->session_mutex); cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } + if (!rc && + (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + mutex_unlock(&ses->session_mutex); + + /* + * query server network interfaces, in case they change + */ + xid = get_xid(); + rc = SMB3_request_interfaces(xid, tcon, false); + free_xid(xid); + + if (rc) + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); + + if (ses->chan_max > ses->chan_count && + !SERVER_IS_CHAN(server)) { + if (ses->chan_count == 1) + cifs_server_dbg(VFS, "supports multichannel now\n"); + + cifs_try_adding_channels(ses); + } + } else { + mutex_unlock(&ses->session_mutex); + } + if (smb2_command != SMB2_INTERNAL_CMD) - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); atomic_inc(&tconInfoReconnectCount); out: @@ -3808,6 +3903,13 @@ void smb2_reconnect_server(struct work_struct *work) /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ mutex_lock(&pserver->reconnect_mutex); + /* if the server is marked for termination, drop the ref count here */ + if (server->terminate) { + cifs_put_tcp_session(server, true); + mutex_unlock(&pserver->reconnect_mutex); + return; + } + INIT_LIST_HEAD(&tmp_list); INIT_LIST_HEAD(&tmp_ses_list); cifs_dbg(FYI, "Reconnecting tcons and channels\n"); @@ -3852,12 +3954,6 @@ void smb2_reconnect_server(struct work_struct *work) } spin_unlock(&ses->chan_lock); } - /* - * Get the reference to server struct to be sure that the last call of - * cifs_put_tcon() in the loop below won't release the server pointer. - */ - if (tcon_exist || ses_exist) - server->srv_count++; spin_unlock(&cifs_tcp_ses_lock); @@ -3905,13 +4001,17 @@ void smb2_reconnect_server(struct work_struct *work) done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); - if (resched) + if (resched) { queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); + mutex_unlock(&pserver->reconnect_mutex); + + /* no need to put tcp session as we're retrying */ + return; + } mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - if (tcon_exist || ses_exist) - cifs_put_tcp_session(server, 1); + cifs_put_tcp_session(server, true); } int @@ -3931,7 +4031,12 @@ SMB2_echo(struct TCP_Server_Info *server) server->ops->need_neg(server)) { spin_unlock(&server->srv_lock); /* No need to send echo on newly established connections */ - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); + return rc; } spin_unlock(&server->srv_lock); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 23c50ed7d4b5..84ea67301303 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -413,7 +413,13 @@ generate_smb3signingkey(struct cifs_ses *ses, ses->ses_status == SES_GOOD); chan_index = cifs_ses_get_chan_index(ses, server); - /* TODO: introduce ref counting for channels when the can be freed */ + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); + + return -EINVAL; + } + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index d553b7a54621..4f717ad7c21b 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1023,7 +1023,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { server = ses->chans[i].server; - if (!server) + if (!server || server->terminate) continue; /* diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index ac199160bce6..6780aa3e98a1 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -150,10 +150,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto out; - if (pTcon->ses->server->ops->set_EA) + if (pTcon->ses->server->ops->set_EA) { rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, name, value, (__u16)size, cifs_sb->local_nls, cifs_sb); + if (rc == 0) + inode_set_ctime_current(inode); + } break; case XATTR_CIFS_ACL: diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index e6ba1e9b8589..6691ae68af0c 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -366,11 +366,22 @@ static int smb1_allocate_rsp_buf(struct ksmbd_work *work) return 0; } +/** + * set_smb1_rsp_status() - set error type in smb response header + * @work: smb work containing smb response header + * @err: error code to set in response + */ +static void set_smb1_rsp_status(struct ksmbd_work *work, __le32 err) +{ + work->send_no_response = 1; +} + static struct smb_version_ops smb1_server_ops = { .get_cmd_val = get_smb1_cmd_val, .init_rsp_hdr = init_smb1_rsp_hdr, .allocate_rsp_buf = smb1_allocate_rsp_buf, .check_user_session = smb1_check_user_session, + .set_rsp_status = set_smb1_rsp_status, }; static int smb1_negotiate(struct ksmbd_work *work) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 6c0305be895e..51b8bfab7481 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1107,6 +1107,7 @@ pass: struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; + int pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1119,9 +1120,10 @@ pass: pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + - nt_size, GFP_KERNEL); + pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + + pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + + pntsd = kzalloc(pntsd_alloc_size, GFP_KERNEL); if (!pntsd) { rc = -ENOMEM; goto free_aces_base; @@ -1136,6 +1138,27 @@ pass: pntsd->gsidoffset = parent_pntsd->gsidoffset; pntsd->dacloffset = parent_pntsd->dacloffset; + if ((u64)le32_to_cpu(pntsd->osidoffset) + powner_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->gsidoffset) + pgroup_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->dacloffset) + sizeof(struct smb_acl) + nt_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + if (pntsd->osidoffset) { struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 1053127f71ad..c53dea5598fc 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1177,9 +1177,10 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, /** * ksmbd_vfs_kern_path_locked() - lookup a file and get path info - * @name: file path that is relative to share - * @flags: lookup flags - * @path: if lookup succeed, return path info + * @name: file path that is relative to share + * @flags: lookup flags + * @parent_path: if lookup succeed, return parent_path info + * @path: if lookup succeed, return path info * @caseless: caseless filename lookup * * Return: 0 on success, otherwise error diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 723763746238..62972f0ff868 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -173,6 +173,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, const struct export_operations squashfs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = squashfs_fh_to_dentry, .fh_to_parent = squashfs_fh_to_parent, .get_parent = squashfs_get_parent diff --git a/fs/super.c b/fs/super.c index 77faad662739..076392396e72 100644 --- a/fs/super.c +++ b/fs/super.c @@ -2160,3 +2160,4 @@ int sb_init_dio_done_wq(struct super_block *sb) destroy_workqueue(wq); return 0; } +EXPORT_SYMBOL_GPL(sb_init_dio_done_wq); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 23377c1baed9..a480810cd4e3 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -137,6 +137,7 @@ static struct dentry *ufs_get_parent(struct dentry *child) } static const struct export_operations ufs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = ufs_fh_to_dentry, .fh_to_parent = ufs_fh_to_parent, .get_parent = ufs_get_parent, diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 30c931b38853..be62acffad6c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -21,7 +21,7 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_quota.h" @@ -2989,7 +2989,7 @@ xfs_bmap_extsize_align( * If realtime, and the result isn't a multiple of the realtime * extent size we need to remove blocks until it is. */ - if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { + if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) { /* * We're not covering the original request, or * we won't be able to once we fix the length. @@ -3016,7 +3016,7 @@ xfs_bmap_extsize_align( else { align_alen -= orig_off - align_off; align_off = orig_off; - align_alen -= align_alen % mp->m_sb.sb_rextsize; + align_alen -= xfs_extlen_to_rtxmod(mp, align_alen); } /* * Result doesn't cover the request, fail it. @@ -4826,12 +4826,8 @@ xfs_bmap_del_extent_delay( ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); - if (isrt) { - uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); - - do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_frextents(mp, rtexts); - } + if (isrt) + xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount)); /* * Update the inode delalloc counter now and wait to update the @@ -5057,33 +5053,20 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_filblks_t len; - xfs_extlen_t mod; - - len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); - if (!(bflags & XFS_BMAPI_REMAP)) { - xfs_fsblock_t bno; - - bno = div_u64_rem(del->br_startblock, - mp->m_sb.sb_rextsize, &mod); - ASSERT(mod == 0); - - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + error = xfs_rtfree_blocks(tp, del->br_startblock, + del->br_blockcount); if (error) goto done; } do_fx = 0; - nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; } else { do_fx = 1; - nblks = del->br_blockcount; qfield = XFS_TRANS_DQ_BCOUNT; } + nblks = del->br_blockcount; del_endblock = del->br_startblock + del->br_blockcount; if (cur) { @@ -5289,7 +5272,6 @@ __xfs_bunmapi( int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ - xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t end; struct xfs_iext_cursor icur; @@ -5384,8 +5366,8 @@ __xfs_bunmapi( if (!isrt) goto delete; - sum = del.br_startblock + del.br_blockcount; - div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod); + mod = xfs_rtb_to_rtxoff(mp, + del.br_startblock + del.br_blockcount); if (mod) { /* * Realtime extent not lined up at the end. @@ -5432,7 +5414,8 @@ __xfs_bunmapi( goto error0; goto nodelete; } - div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod); + + mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); if (mod) { xfs_extlen_t off = mp->m_sb.sb_rextsize - mod; @@ -6209,8 +6192,8 @@ xfs_bmap_validate_extent( return __this_address; if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { - if (!xfs_verify_rtext(mp, irec->br_startblock, - irec->br_blockcount)) + if (!xfs_verify_rtbext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } else { if (!xfs_verify_fsbext(mp, irec->br_startblock, diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 371dc07233e0..9a88aba1589f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -98,7 +98,7 @@ typedef struct xfs_sb { uint32_t sb_blocksize; /* logical block size, bytes */ xfs_rfsblock_t sb_dblocks; /* number of data blocks */ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ - xfs_rtblock_t sb_rextents; /* number of realtime extents */ + xfs_rtbxlen_t sb_rextents; /* number of realtime extents */ uuid_t sb_uuid; /* user-visible file system unique id */ xfs_fsblock_t sb_logstart; /* starting block of log if internal */ xfs_ino_t sb_rootino; /* root inode number */ @@ -691,6 +691,22 @@ struct xfs_agfl { xfs_daddr_to_agno(mp, (d) + (len) - 1))) /* + * Realtime bitmap information is accessed by the word, which is currently + * stored in host-endian format. + */ +union xfs_rtword_raw { + __u32 old; +}; + +/* + * Realtime summary counts are accessed by the word, which is currently + * stored in host-endian format. + */ +union xfs_suminfo_raw { + __u32 old; +}; + +/* * XFS Timestamps * ============== * @@ -1142,24 +1158,10 @@ static inline bool xfs_dinode_has_large_extent_counts( #define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize) #define XFS_BLOCKMASK(mp) ((mp)->m_blockmask) -#define XFS_BLOCKWSIZE(mp) ((mp)->m_blockwsize) -#define XFS_BLOCKWMASK(mp) ((mp)->m_blockwmask) /* - * RT Summary and bit manipulation macros. + * RT bit manipulation macros. */ -#define XFS_SUMOFFS(mp,ls,bb) ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb))) -#define XFS_SUMOFFSTOBLOCK(mp,s) \ - (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) -#define XFS_SUMPTR(mp,bp,so) \ - ((xfs_suminfo_t *)((bp)->b_addr + \ - (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) - -#define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) -#define XFS_BLOCKTOBIT(mp,bb) ((bb) << (mp)->m_blkbit_log) -#define XFS_BITTOWORD(mp,bi) \ - ((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp))) - #define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b)) #define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b)) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 396648acb5be..c269d704314d 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -16,6 +16,7 @@ #include "xfs_trans.h" #include "xfs_rtalloc.h" #include "xfs_error.h" +#include "xfs_rtbitmap.h" /* * Realtime allocator bitmap functions shared with userspace. @@ -46,25 +47,69 @@ const struct xfs_buf_ops xfs_rtbuf_ops = { .verify_write = xfs_rtbuf_verify_write, }; +/* Release cached rt bitmap and summary buffers. */ +void +xfs_rtbuf_cache_relse( + struct xfs_rtalloc_args *args) +{ + if (args->rbmbp) { + xfs_trans_brelse(args->tp, args->rbmbp); + args->rbmbp = NULL; + args->rbmoff = NULLFILEOFF; + } + if (args->sumbp) { + xfs_trans_brelse(args->tp, args->sumbp); + args->sumbp = NULL; + args->sumoff = NULLFILEOFF; + } +} + /* * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ int xfs_rtbuf_get( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t block, /* block number in bitmap or summary */ - int issum, /* is summary not bitmap */ - struct xfs_buf **bpp) /* output: buffer for the block */ + struct xfs_rtalloc_args *args, + xfs_fileoff_t block, /* block number in bitmap or summary */ + int issum) /* is summary not bitmap */ { - struct xfs_buf *bp; /* block buffer, result */ - xfs_inode_t *ip; /* bitmap or summary inode */ - xfs_bmbt_irec_t map; - int nmap = 1; - int error; /* error value */ + struct xfs_mount *mp = args->mp; + struct xfs_buf **cbpp; /* cached block buffer */ + xfs_fileoff_t *coffp; /* cached block number */ + struct xfs_buf *bp; /* block buffer, result */ + struct xfs_inode *ip; /* bitmap or summary inode */ + struct xfs_bmbt_irec map; + enum xfs_blft type; + int nmap = 1; + int error; - ip = issum ? mp->m_rsumip : mp->m_rbmip; + if (issum) { + cbpp = &args->sumbp; + coffp = &args->sumoff; + ip = mp->m_rsumip; + type = XFS_BLFT_RTSUMMARY_BUF; + } else { + cbpp = &args->rbmbp; + coffp = &args->rbmoff; + ip = mp->m_rbmip; + type = XFS_BLFT_RTBITMAP_BUF; + } + + /* + * If we have a cached buffer, and the block number matches, use that. + */ + if (*cbpp && *coffp == block) + return 0; + + /* + * Otherwise we have to have to get the buffer. If there was an old + * one, get rid of it first. + */ + if (*cbpp) { + xfs_trans_brelse(args->tp, *cbpp); + *cbpp = NULL; + } error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0); if (error) @@ -74,15 +119,15 @@ xfs_rtbuf_get( return -EFSCORRUPTED; ASSERT(map.br_startblock != NULLFSBLOCK); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map.br_startblock), mp->m_bsize, 0, &bp, &xfs_rtbuf_ops); if (error) return error; - xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF - : XFS_BLFT_RTBITMAP_BUF); - *bpp = bp; + xfs_trans_buf_set_type(args->tp, bp, type); + *cbpp = bp; + *coffp = block; return 0; } @@ -92,47 +137,44 @@ xfs_rtbuf_get( */ int xfs_rtfind_back( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext to look at */ + xfs_rtxnum_t limit, /* last rtext to look at */ + xfs_rtxnum_t *rtx) /* out: start rtext found */ { - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - struct xfs_buf *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t firstbit; /* first useful bit in the word */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ + struct xfs_mount *mp = args->mp; + int bit; /* bit number in the word */ + xfs_fileoff_t block; /* bitmap block number */ + int error; /* error value */ + xfs_rtxnum_t firstbit; /* first useful bit in the word */ + xfs_rtxnum_t i; /* current bit number rel. to start */ + xfs_rtxnum_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + xfs_rtword_t incore; + unsigned int word; /* word number in the buffer */ /* * Compute and read in starting bitmap block for starting block. */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { + block = xfs_rtx_to_rbmblock(mp, start); + error = xfs_rtbitmap_read_buf(args, block); + if (error) return error; - } - bufp = bp->b_addr; + /* * Get the first word's index & point to it. */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; + word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); len = start - limit + 1; /* * Compute match value, based on the bit at start: if 1 (free) * then all-ones, else all-zeroes. */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + incore = xfs_rtbitmap_getword(args, word); + want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0; /* * If the starting position is not word-aligned, deal with the * partial word. @@ -149,13 +191,12 @@ xfs_rtfind_back( * Calculate the difference between the value there * and what we're looking for. */ - if ((wdiff = (*b ^ want) & mask)) { + if ((wdiff = (incore ^ want) & mask)) { /* * Different. Mark where we are and return. */ - xfs_trans_brelse(tp, bp); i = bit - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; + *rtx = start - i + 1; return 0; } i = bit - firstbit + 1; @@ -167,19 +208,11 @@ xfs_rtfind_back( /* * If done with this block, get the previous one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, --block); + if (error) return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; + + word = mp->m_blockwsize - 1; } } else { /* @@ -195,13 +228,13 @@ xfs_rtfind_back( /* * Compute difference between actual and desired value. */ - if ((wdiff = *b ^ want)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = incore ^ want)) { /* * Different, mark where we are and return. */ - xfs_trans_brelse(tp, bp); i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; + *rtx = start - i + 1; return 0; } i += XFS_NBWORD; @@ -213,19 +246,11 @@ xfs_rtfind_back( /* * If done with this block, get the previous one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, --block); + if (error) return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; + + word = mp->m_blockwsize - 1; } } /* @@ -242,13 +267,13 @@ xfs_rtfind_back( /* * Compute difference between actual and desired value. */ - if ((wdiff = (*b ^ want) & mask)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = (incore ^ want) & mask)) { /* * Different, mark where we are and return. */ - xfs_trans_brelse(tp, bp); i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; + *rtx = start - i + 1; return 0; } else i = len; @@ -256,8 +281,7 @@ xfs_rtfind_back( /* * No match, return that we scanned the whole area. */ - xfs_trans_brelse(tp, bp); - *rtblock = start - i + 1; + *rtx = start - i + 1; return 0; } @@ -267,47 +291,44 @@ xfs_rtfind_back( */ int xfs_rtfind_forw( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext to look at */ + xfs_rtxnum_t limit, /* last rtext to look at */ + xfs_rtxnum_t *rtx) /* out: start rtext found */ { - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - struct xfs_buf *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in the word */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ + struct xfs_mount *mp = args->mp; + int bit; /* bit number in the word */ + xfs_fileoff_t block; /* bitmap block number */ + int error; + xfs_rtxnum_t i; /* current bit number rel. to start */ + xfs_rtxnum_t lastbit;/* last useful bit in the word */ + xfs_rtxnum_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + xfs_rtword_t incore; + unsigned int word; /* word number in the buffer */ /* * Compute and read in starting bitmap block for starting block. */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { + block = xfs_rtx_to_rbmblock(mp, start); + error = xfs_rtbitmap_read_buf(args, block); + if (error) return error; - } - bufp = bp->b_addr; + /* * Get the first word's index & point to it. */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; + word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); len = limit - start + 1; /* * Compute match value, based on the bit at start: if 1 (free) * then all-ones, else all-zeroes. */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + incore = xfs_rtbitmap_getword(args, word); + want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0; /* * If the starting position is not word-aligned, deal with the * partial word. @@ -323,13 +344,12 @@ xfs_rtfind_forw( * Calculate the difference between the value there * and what we're looking for. */ - if ((wdiff = (*b ^ want) & mask)) { + if ((wdiff = (incore ^ want) & mask)) { /* * Different. Mark where we are and return. */ - xfs_trans_brelse(tp, bp); i = XFS_RTLOBIT(wdiff) - bit; - *rtblock = start + i - 1; + *rtx = start + i - 1; return 0; } i = lastbit - bit; @@ -337,22 +357,15 @@ xfs_rtfind_forw( * Go on to next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * If done with this block, get the previous one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - b = bufp = bp->b_addr; + word = 0; - } else { - /* - * Go on to the previous word in the buffer. - */ - b++; } } else { /* @@ -368,13 +381,13 @@ xfs_rtfind_forw( /* * Compute difference between actual and desired value. */ - if ((wdiff = *b ^ want)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = incore ^ want)) { /* * Different, mark where we are and return. */ - xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; + *rtx = start + i - 1; return 0; } i += XFS_NBWORD; @@ -382,22 +395,15 @@ xfs_rtfind_forw( * Go on to next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * If done with this block, get the next one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - b = bufp = bp->b_addr; + word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; } } /* @@ -412,13 +418,13 @@ xfs_rtfind_forw( /* * Compute difference between actual and desired value. */ - if ((wdiff = (*b ^ want) & mask)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = (incore ^ want) & mask)) { /* * Different, mark where we are and return. */ - xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; + *rtx = start + i - 1; return 0; } else i = len; @@ -426,11 +432,25 @@ xfs_rtfind_forw( /* * No match, return that we scanned the whole area. */ - xfs_trans_brelse(tp, bp); - *rtblock = start + i - 1; + *rtx = start + i - 1; return 0; } +/* Log rtsummary counter at @infoword. */ +static inline void +xfs_trans_log_rtsummary( + struct xfs_rtalloc_args *args, + unsigned int infoword) +{ + struct xfs_buf *bp = args->sumbp; + size_t first, last; + + first = (void *)xfs_rsumblock_infoptr(args, infoword) - bp->b_addr; + last = first + sizeof(xfs_suminfo_t) - 1; + + xfs_trans_log_buf(args->tp, bp, first, last); +} + /* * Read and/or modify the summary information for a given extent size, * bitmap block combination. @@ -442,86 +462,77 @@ xfs_rtfind_forw( */ int xfs_rtmodify_summary_int( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_suminfo_t *sum) /* out: summary info for this block */ + struct xfs_rtalloc_args *args, + int log, /* log2 of extent size */ + xfs_fileoff_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_suminfo_t *sum) /* out: summary info for this block */ { - struct xfs_buf *bp; /* buffer for the summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ + struct xfs_mount *mp = args->mp; + int error; + xfs_fileoff_t sb; /* summary fsblock */ + xfs_rtsumoff_t so; /* index into the summary file */ + unsigned int infoword; /* * Compute entry number in the summary file. */ - so = XFS_SUMOFFS(mp, log, bbno); + so = xfs_rtsumoffs(mp, log, bbno); /* * Compute the block number in the summary file. */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (*rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (*rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - *rbpp = bp; - *rsb = sb; - } + sb = xfs_rtsumoffs_to_block(mp, so); + + error = xfs_rtsummary_read_buf(args, sb); + if (error) + return error; + /* * Point to the summary information, modify/log it, and/or copy it out. */ - sp = XFS_SUMPTR(mp, bp, so); + infoword = xfs_rtsumoffs_to_infoword(mp, so); if (delta) { - uint first = (uint)((char *)sp - (char *)bp->b_addr); + xfs_suminfo_t val = xfs_suminfo_add(args, infoword, delta); - *sp += delta; if (mp->m_rsum_cache) { - if (*sp == 0 && log == mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno]++; - if (*sp != 0 && log < mp->m_rsum_cache[bbno]) + if (val == 0 && log + 1 == mp->m_rsum_cache[bbno]) mp->m_rsum_cache[bbno] = log; + if (val != 0 && log >= mp->m_rsum_cache[bbno]) + mp->m_rsum_cache[bbno] = log + 1; } - xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1); + xfs_trans_log_rtsummary(args, infoword); + if (sum) + *sum = val; + } else if (sum) { + *sum = xfs_suminfo_get(args, infoword); } - if (sum) - *sum = *sp; return 0; } int xfs_rtmodify_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ + struct xfs_rtalloc_args *args, + int log, /* log2 of extent size */ + xfs_fileoff_t bbno, /* bitmap block number */ + int delta) /* in/out: summary block number */ +{ + return xfs_rtmodify_summary_int(args, log, bbno, delta, NULL); +} + +/* Log rtbitmap block from the word @from to the byte before @next. */ +static inline void +xfs_trans_log_rtbitmap( + struct xfs_rtalloc_args *args, + unsigned int from, + unsigned int next) { - return xfs_rtmodify_summary_int(mp, tp, log, bbno, - delta, rbpp, rsb, NULL); + struct xfs_buf *bp = args->rbmbp; + size_t first, last; + + first = (void *)xfs_rbmblock_wordptr(args, from) - bp->b_addr; + last = ((void *)xfs_rbmblock_wordptr(args, next) - 1) - bp->b_addr; + + xfs_trans_log_buf(args->tp, bp, first, last); } /* @@ -530,41 +541,37 @@ xfs_rtmodify_summary( */ int xfs_rtmodify_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to modify */ - xfs_extlen_t len, /* length of extent to modify */ - int val) /* 1 for free, 0 for allocated */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext to modify */ + xfs_rtxlen_t len, /* length of extent to modify */ + int val) /* 1 for free, 0 for allocated */ { - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - struct xfs_buf *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtword_t *first; /* first used word in the buffer */ - int i; /* current bit number rel. to start */ - int lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask o frelevant bits for value */ - int word; /* word number in the buffer */ + struct xfs_mount *mp = args->mp; + int bit; /* bit number in the word */ + xfs_fileoff_t block; /* bitmap block number */ + int error; + int i; /* current bit number rel. to start */ + int lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t incore; + unsigned int firstword; /* first word used in the buffer */ + unsigned int word; /* word number in the buffer */ /* * Compute starting bitmap block number. */ - block = XFS_BITTOBLOCK(mp, start); + block = xfs_rtx_to_rbmblock(mp, start); /* * Read the bitmap block, and point to its data. */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, block); + if (error) return error; - } - bufp = bp->b_addr; + /* * Compute the starting word's address, and starting bit. */ - word = XFS_BITTOWORD(mp, start); - first = b = &bufp[word]; + firstword = word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); /* * 0 (allocated) => all zeroes; 1 (free) => all ones. @@ -583,34 +590,28 @@ xfs_rtmodify_range( /* * Set/clear the active bits. */ + incore = xfs_rtbitmap_getword(args, word); if (val) - *b |= mask; + incore |= mask; else - *b &= ~mask; + incore &= ~mask; + xfs_rtbitmap_setword(args, word, incore); i = lastbit - bit; /* * Go on to the next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * Log the changed part of this block. * Get the next one. */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + xfs_trans_log_rtbitmap(args, firstword, word); + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; + + firstword = word = 0; } } else { /* @@ -626,31 +627,23 @@ xfs_rtmodify_range( /* * Set the word value correctly. */ - *b = val; + xfs_rtbitmap_setword(args, word, val); i += XFS_NBWORD; /* * Go on to the next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * Log the changed part of this block. * Get the next one. */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + xfs_trans_log_rtbitmap(args, firstword, word); + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; + + firstword = word = 0; } } /* @@ -665,18 +658,19 @@ xfs_rtmodify_range( /* * Set/clear the active bits. */ + incore = xfs_rtbitmap_getword(args, word); if (val) - *b |= mask; + incore |= mask; else - *b &= ~mask; - b++; + incore &= ~mask; + xfs_rtbitmap_setword(args, word, incore); + word++; } /* * Log any remaining changed bytes. */ - if (b > first) - xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp - 1)); + if (word > firstword) + xfs_trans_log_rtbitmap(args, firstword, word); return 0; } @@ -686,23 +680,21 @@ xfs_rtmodify_range( */ int xfs_rtfree_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to free */ - xfs_extlen_t len, /* length to free */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext to free */ + xfs_rtxlen_t len) /* in/out: summary block number */ { - xfs_rtblock_t end; /* end of the freed extent */ - int error; /* error value */ - xfs_rtblock_t postblock; /* first block freed > end */ - xfs_rtblock_t preblock; /* first block freed < start */ + struct xfs_mount *mp = args->mp; + xfs_rtxnum_t end; /* end of the freed extent */ + int error; /* error value */ + xfs_rtxnum_t postblock; /* first rtext freed > end */ + xfs_rtxnum_t preblock; /* first rtext freed < start */ end = start + len - 1; /* * Modify the bitmap to mark this extent freed. */ - error = xfs_rtmodify_range(mp, tp, start, len, 1); + error = xfs_rtmodify_range(args, start, len, 1); if (error) { return error; } @@ -711,15 +703,15 @@ xfs_rtfree_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + error = xfs_rtfind_back(args, start, 0, &preblock); if (error) { return error; } /* * Find the next allocated block (end of allocated extent). */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); + error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1, + &postblock); if (error) return error; /* @@ -727,9 +719,9 @@ xfs_rtfree_range( * old extent, add summary data for them to be allocated. */ if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + error = xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(start - preblock), + xfs_rtx_to_rbmblock(mp, preblock), -1); if (error) { return error; } @@ -739,9 +731,9 @@ xfs_rtfree_range( * old extent, add summary data for them to be allocated. */ if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); + error = xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(postblock - end), + xfs_rtx_to_rbmblock(mp, end + 1), -1); if (error) { return error; } @@ -750,10 +742,9 @@ xfs_rtfree_range( * Increment the summary information corresponding to the entire * (new) free extent. */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); - return error; + return xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + xfs_rtx_to_rbmblock(mp, preblock), 1); } /* @@ -762,43 +753,39 @@ xfs_rtfree_range( */ int xfs_rtcheck_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block number of extent */ - xfs_extlen_t len, /* length of extent */ - int val, /* 1 for free, 0 for allocated */ - xfs_rtblock_t *new, /* out: first block not matching */ - int *stat) /* out: 1 for matches, 0 for not */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext number of extent */ + xfs_rtxlen_t len, /* length of extent */ + int val, /* 1 for free, 0 for allocated */ + xfs_rtxnum_t *new, /* out: first rtext not matching */ + int *stat) /* out: 1 for matches, 0 for not */ { - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - struct xfs_buf *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ + struct xfs_mount *mp = args->mp; + int bit; /* bit number in the word */ + xfs_fileoff_t block; /* bitmap block number */ + int error; + xfs_rtxnum_t i; /* current bit number rel. to start */ + xfs_rtxnum_t lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t wdiff; /* difference from wanted value */ + xfs_rtword_t incore; + unsigned int word; /* word number in the buffer */ /* * Compute starting bitmap block number */ - block = XFS_BITTOBLOCK(mp, start); + block = xfs_rtx_to_rbmblock(mp, start); /* * Read the bitmap block. */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, block); + if (error) return error; - } - bufp = bp->b_addr; + /* * Compute the starting word's address, and starting bit. */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; + word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); /* * 0 (allocated) => all zero's; 1 (free) => all one's. @@ -820,11 +807,11 @@ xfs_rtcheck_range( /* * Compute difference between actual and desired value. */ - if ((wdiff = (*b ^ val) & mask)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = (incore ^ val) & mask)) { /* * Different, compute first wrong bit and return. */ - xfs_trans_brelse(tp, bp); i = XFS_RTLOBIT(wdiff) - bit; *new = start + i; *stat = 0; @@ -835,22 +822,15 @@ xfs_rtcheck_range( * Go on to next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * If done with this block, get the next one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - b = bufp = bp->b_addr; + word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; } } else { /* @@ -866,11 +846,11 @@ xfs_rtcheck_range( /* * Compute difference between actual and desired value. */ - if ((wdiff = *b ^ val)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = incore ^ val)) { /* * Different, compute first wrong bit and return. */ - xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); *new = start + i; *stat = 0; @@ -881,22 +861,15 @@ xfs_rtcheck_range( * Go on to next block if that's where the next word is * and we need the next word. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + if (++word == mp->m_blockwsize && i < len) { /* * If done with this block, get the next one. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { + error = xfs_rtbitmap_read_buf(args, ++block); + if (error) return error; - } - b = bufp = bp->b_addr; + word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; } } /* @@ -911,11 +884,11 @@ xfs_rtcheck_range( /* * Compute difference between actual and desired value. */ - if ((wdiff = (*b ^ val) & mask)) { + incore = xfs_rtbitmap_getword(args, word); + if ((wdiff = (incore ^ val) & mask)) { /* * Different, compute first wrong bit and return. */ - xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); *new = start + i; *stat = 0; @@ -926,7 +899,6 @@ xfs_rtcheck_range( /* * Successful, return. */ - xfs_trans_brelse(tp, bp); *new = start + i; *stat = 1; return 0; @@ -936,58 +908,57 @@ xfs_rtcheck_range( /* * Check that the given extent (block range) is allocated already. */ -STATIC int /* error */ +STATIC int xfs_rtcheck_alloc_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number of extent */ - xfs_extlen_t len) /* length of extent */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext number of extent */ + xfs_rtxlen_t len) /* length of extent */ { - xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ - int stat; - int error; + xfs_rtxnum_t new; /* dummy for xfs_rtcheck_range */ + int stat; + int error; - error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); + error = xfs_rtcheck_range(args, start, len, 0, &new, &stat); if (error) return error; ASSERT(stat); return 0; } #else -#define xfs_rtcheck_alloc_range(m,t,b,l) (0) +#define xfs_rtcheck_alloc_range(a,b,l) (0) #endif /* * Free an extent in the realtime subvolume. Length is expressed in * realtime extents, as is the block number. */ -int /* error */ +int xfs_rtfree_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len) /* length of extent freed */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtxnum_t start, /* starting rtext number to free */ + xfs_rtxlen_t len) /* length of extent freed */ { - int error; /* error value */ - xfs_mount_t *mp; /* file system mount structure */ - xfs_fsblock_t sb; /* summary file block number */ - struct xfs_buf *sumbp = NULL; /* summary file block buffer */ - struct timespec64 atime; - - mp = tp->t_mountp; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rtalloc_args args = { + .mp = mp, + .tp = tp, + }; + int error; + struct timespec64 atime; ASSERT(mp->m_rbmip->i_itemp != NULL); ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - error = xfs_rtcheck_alloc_range(mp, tp, bno, len); + error = xfs_rtcheck_alloc_range(&args, start, len); if (error) return error; /* * Free the range of realtime blocks. */ - error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); - if (error) { - return error; - } + error = xfs_rtfree_range(&args, start, len); + if (error) + goto out; + /* * Mark more blocks free in the superblock. */ @@ -1002,11 +973,47 @@ xfs_rtfree_extent( mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM; atime = inode_get_atime(VFS_I(mp->m_rbmip)); - *((uint64_t *)&atime) = 0; + atime.tv_sec = 0; inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); } - return 0; + error = 0; +out: + xfs_rtbuf_cache_relse(&args); + return error; +} + +/* + * Free some blocks in the realtime subvolume. rtbno and rtlen are in units of + * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen + * cannot exceed XFS_MAX_BMBT_EXTLEN. + */ +int +xfs_rtfree_blocks( + struct xfs_trans *tp, + xfs_fsblock_t rtbno, + xfs_filblks_t rtlen) +{ + struct xfs_mount *mp = tp->t_mountp; + xfs_rtxnum_t start; + xfs_filblks_t len; + xfs_extlen_t mod; + + ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); + + len = xfs_rtb_to_rtxrem(mp, rtlen, &mod); + if (mod) { + ASSERT(mod == 0); + return -EIO; + } + + start = xfs_rtb_to_rtxrem(mp, rtbno, &mod); + if (mod) { + ASSERT(mod == 0); + return -EIO; + } + + return xfs_rtfree_extent(tp, start, len); } /* Find all the free records within a given range. */ @@ -1019,10 +1026,14 @@ xfs_rtalloc_query_range( xfs_rtalloc_query_range_fn fn, void *priv) { + struct xfs_rtalloc_args args = { + .mp = mp, + .tp = tp, + }; struct xfs_rtalloc_rec rec; - xfs_rtblock_t rtstart; - xfs_rtblock_t rtend; - xfs_rtblock_t high_key; + xfs_rtxnum_t rtstart; + xfs_rtxnum_t rtend; + xfs_rtxnum_t high_key; int is_free; int error = 0; @@ -1038,13 +1049,13 @@ xfs_rtalloc_query_range( rtstart = low_rec->ar_startext; while (rtstart <= high_key) { /* Is the first block free? */ - error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, + error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend, &is_free); if (error) break; /* How long does the extent go for? */ - error = xfs_rtfind_forw(mp, tp, rtstart, high_key, &rtend); + error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend); if (error) break; @@ -1060,6 +1071,7 @@ xfs_rtalloc_query_range( rtstart = rtend + 1; } + xfs_rtbuf_cache_relse(&args); return error; } @@ -1085,18 +1097,79 @@ int xfs_rtalloc_extent_is_free( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, - xfs_extlen_t len, + xfs_rtxnum_t start, + xfs_rtxlen_t len, bool *is_free) { - xfs_rtblock_t end; + struct xfs_rtalloc_args args = { + .mp = mp, + .tp = tp, + }; + xfs_rtxnum_t end; int matches; int error; - error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); + error = xfs_rtcheck_range(&args, start, len, 1, &end, &matches); + xfs_rtbuf_cache_relse(&args); if (error) return error; *is_free = matches; return 0; } + +/* + * Compute the number of rtbitmap blocks needed to track the given number of rt + * extents. + */ +xfs_filblks_t +xfs_rtbitmap_blockcount( + struct xfs_mount *mp, + xfs_rtbxlen_t rtextents) +{ + return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); +} + +/* + * Compute the number of rtbitmap words needed to populate every block of a + * bitmap that is large enough to track the given number of rt extents. + */ +unsigned long long +xfs_rtbitmap_wordcount( + struct xfs_mount *mp, + xfs_rtbxlen_t rtextents) +{ + xfs_filblks_t blocks; + + blocks = xfs_rtbitmap_blockcount(mp, rtextents); + return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; +} + +/* Compute the number of rtsummary blocks needed to track the given rt space. */ +xfs_filblks_t +xfs_rtsummary_blockcount( + struct xfs_mount *mp, + unsigned int rsumlevels, + xfs_extlen_t rbmblocks) +{ + unsigned long long rsumwords; + + rsumwords = (unsigned long long)rsumlevels * rbmblocks; + return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG); +} + +/* + * Compute the number of rtsummary info words needed to populate every block of + * a summary file that is large enough to track the given rt space. + */ +unsigned long long +xfs_rtsummary_wordcount( + struct xfs_mount *mp, + unsigned int rsumlevels, + xfs_extlen_t rbmblocks) +{ + xfs_filblks_t blocks; + + blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks); + return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; +} diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h new file mode 100644 index 000000000000..c0637057d69c --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_RTBITMAP_H__ +#define __XFS_RTBITMAP_H__ + +struct xfs_rtalloc_args { + struct xfs_mount *mp; + struct xfs_trans *tp; + + struct xfs_buf *rbmbp; /* bitmap block buffer */ + struct xfs_buf *sumbp; /* summary block buffer */ + + xfs_fileoff_t rbmoff; /* bitmap block number */ + xfs_fileoff_t sumoff; /* summary block number */ +}; + +static inline xfs_rtblock_t +xfs_rtx_to_rtb( + struct xfs_mount *mp, + xfs_rtxnum_t rtx) +{ + if (mp->m_rtxblklog >= 0) + return rtx << mp->m_rtxblklog; + + return rtx * mp->m_sb.sb_rextsize; +} + +static inline xfs_extlen_t +xfs_rtxlen_to_extlen( + struct xfs_mount *mp, + xfs_rtxlen_t rtxlen) +{ + if (mp->m_rtxblklog >= 0) + return rtxlen << mp->m_rtxblklog; + + return rtxlen * mp->m_sb.sb_rextsize; +} + +/* Compute the misalignment between an extent length and a realtime extent .*/ +static inline unsigned int +xfs_extlen_to_rtxmod( + struct xfs_mount *mp, + xfs_extlen_t len) +{ + if (mp->m_rtxblklog >= 0) + return len & mp->m_rtxblkmask; + + return len % mp->m_sb.sb_rextsize; +} + +static inline xfs_rtxlen_t +xfs_extlen_to_rtxlen( + struct xfs_mount *mp, + xfs_extlen_t len) +{ + if (mp->m_rtxblklog >= 0) + return len >> mp->m_rtxblklog; + + return len / mp->m_sb.sb_rextsize; +} + +/* Convert an rt block number into an rt extent number. */ +static inline xfs_rtxnum_t +xfs_rtb_to_rtx( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + if (likely(mp->m_rtxblklog >= 0)) + return rtbno >> mp->m_rtxblklog; + + return div_u64(rtbno, mp->m_sb.sb_rextsize); +} + +/* Return the offset of an rt block number within an rt extent. */ +static inline xfs_extlen_t +xfs_rtb_to_rtxoff( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + if (likely(mp->m_rtxblklog >= 0)) + return rtbno & mp->m_rtxblkmask; + + return do_div(rtbno, mp->m_sb.sb_rextsize); +} + +/* + * Crack an rt block number into an rt extent number and an offset within that + * rt extent. Returns the rt extent number directly and the offset in @off. + */ +static inline xfs_rtxnum_t +xfs_rtb_to_rtxrem( + struct xfs_mount *mp, + xfs_rtblock_t rtbno, + xfs_extlen_t *off) +{ + if (likely(mp->m_rtxblklog >= 0)) { + *off = rtbno & mp->m_rtxblkmask; + return rtbno >> mp->m_rtxblklog; + } + + return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off); +} + +/* + * Convert an rt block number into an rt extent number, rounding up to the next + * rt extent if the rt block is not aligned to an rt extent boundary. + */ +static inline xfs_rtxnum_t +xfs_rtb_to_rtxup( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + if (likely(mp->m_rtxblklog >= 0)) { + if (rtbno & mp->m_rtxblkmask) + return (rtbno >> mp->m_rtxblklog) + 1; + return rtbno >> mp->m_rtxblklog; + } + + if (do_div(rtbno, mp->m_sb.sb_rextsize)) + rtbno++; + return rtbno; +} + +/* Round this rtblock up to the nearest rt extent size. */ +static inline xfs_rtblock_t +xfs_rtb_roundup_rtx( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return roundup_64(rtbno, mp->m_sb.sb_rextsize); +} + +/* Round this rtblock down to the nearest rt extent size. */ +static inline xfs_rtblock_t +xfs_rtb_rounddown_rtx( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return rounddown_64(rtbno, mp->m_sb.sb_rextsize); +} + +/* Convert an rt extent number to a file block offset in the rt bitmap file. */ +static inline xfs_fileoff_t +xfs_rtx_to_rbmblock( + struct xfs_mount *mp, + xfs_rtxnum_t rtx) +{ + return rtx >> mp->m_blkbit_log; +} + +/* Convert an rt extent number to a word offset within an rt bitmap block. */ +static inline unsigned int +xfs_rtx_to_rbmword( + struct xfs_mount *mp, + xfs_rtxnum_t rtx) +{ + return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1); +} + +/* Convert a file block offset in the rt bitmap file to an rt extent number. */ +static inline xfs_rtxnum_t +xfs_rbmblock_to_rtx( + struct xfs_mount *mp, + xfs_fileoff_t rbmoff) +{ + return rbmoff << mp->m_blkbit_log; +} + +/* Return a pointer to a bitmap word within a rt bitmap block. */ +static inline union xfs_rtword_raw * +xfs_rbmblock_wordptr( + struct xfs_rtalloc_args *args, + unsigned int index) +{ + union xfs_rtword_raw *words = args->rbmbp->b_addr; + + return words + index; +} + +/* Convert an ondisk bitmap word to its incore representation. */ +static inline xfs_rtword_t +xfs_rtbitmap_getword( + struct xfs_rtalloc_args *args, + unsigned int index) +{ + union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); + + return word->old; +} + +/* Set an ondisk bitmap word from an incore representation. */ +static inline void +xfs_rtbitmap_setword( + struct xfs_rtalloc_args *args, + unsigned int index, + xfs_rtword_t value) +{ + union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); + + word->old = value; +} + +/* + * Convert a rt extent length and rt bitmap block number to a xfs_suminfo_t + * offset within the rt summary file. + */ +static inline xfs_rtsumoff_t +xfs_rtsumoffs( + struct xfs_mount *mp, + int log2_len, + xfs_fileoff_t rbmoff) +{ + return log2_len * mp->m_sb.sb_rbmblocks + rbmoff; +} + +/* + * Convert an xfs_suminfo_t offset to a file block offset within the rt summary + * file. + */ +static inline xfs_fileoff_t +xfs_rtsumoffs_to_block( + struct xfs_mount *mp, + xfs_rtsumoff_t rsumoff) +{ + return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t)); +} + +/* + * Convert an xfs_suminfo_t offset to an info word offset within an rt summary + * block. + */ +static inline unsigned int +xfs_rtsumoffs_to_infoword( + struct xfs_mount *mp, + xfs_rtsumoff_t rsumoff) +{ + unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG; + + return rsumoff & mask; +} + +/* Return a pointer to a summary info word within a rt summary block. */ +static inline union xfs_suminfo_raw * +xfs_rsumblock_infoptr( + struct xfs_rtalloc_args *args, + unsigned int index) +{ + union xfs_suminfo_raw *info = args->sumbp->b_addr; + + return info + index; +} + +/* Get the current value of a summary counter. */ +static inline xfs_suminfo_t +xfs_suminfo_get( + struct xfs_rtalloc_args *args, + unsigned int index) +{ + union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); + + return info->old; +} + +/* Add to the current value of a summary counter and return the new value. */ +static inline xfs_suminfo_t +xfs_suminfo_add( + struct xfs_rtalloc_args *args, + unsigned int index, + int delta) +{ + union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); + + info->old += delta; + return info->old; +} + +/* + * Functions for walking free space rtextents in the realtime bitmap. + */ +struct xfs_rtalloc_rec { + xfs_rtxnum_t ar_startext; + xfs_rtbxlen_t ar_extcount; +}; + +typedef int (*xfs_rtalloc_query_range_fn)( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv); + +#ifdef CONFIG_XFS_RT +void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args); + +int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block, + int issum); + +static inline int +xfs_rtbitmap_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + return xfs_rtbuf_get(args, block, 0); +} + +static inline int +xfs_rtsummary_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + return xfs_rtbuf_get(args, block, 1); +} + +int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, + xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat); +int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, + xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); +int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, + xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); +int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, + xfs_rtxlen_t len, int val); +int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log, + xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum); +int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, + xfs_fileoff_t bbno, int delta); +int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, + xfs_rtxlen_t len); +int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, + const struct xfs_rtalloc_rec *low_rec, + const struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, void *priv); +int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv); +int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtxnum_t start, xfs_rtxlen_t len, + bool *is_free); +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtxnum_t start, /* starting rtext number to free */ + xfs_rtxlen_t len); /* length of extent freed */ + +/* Same as above, but in units of rt blocks. */ +int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, + xfs_filblks_t rtlen); + +xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t + rtextents); +unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, + xfs_rtbxlen_t rtextents); + +xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp, + unsigned int rsumlevels, xfs_extlen_t rbmblocks); +unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, + unsigned int rsumlevels, xfs_extlen_t rbmblocks); +#else /* CONFIG_XFS_RT */ +# define xfs_rtfree_extent(t,b,l) (-ENOSYS) +# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) +# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) +# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) +# define xfs_rtbitmap_read_buf(a,b) (-ENOSYS) +# define xfs_rtsummary_read_buf(a,b) (-ENOSYS) +# define xfs_rtbuf_cache_relse(a) (0) +# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) +static inline xfs_filblks_t +xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) +{ + /* shut up gcc */ + return 0; +} +# define xfs_rtbitmap_wordcount(mp, r) (0) +# define xfs_rtsummary_blockcount(mp, l, b) (0) +# define xfs_rtsummary_wordcount(mp, l, b) (0) +#endif /* CONFIG_XFS_RT */ + +#endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 6264daaab37b..1f74d0cd1618 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -975,6 +975,8 @@ xfs_sb_mount_common( mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; + mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); + mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index a5e14740ec9a..19134b23c10b 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -25,7 +25,7 @@ extern uint64_t xfs_sb_version_to_features(struct xfs_sb *sbp); extern int xfs_update_secondary_sbs(struct xfs_mount *mp); -#define XFS_FS_GEOM_MAX_STRUCT_VER (4) +#define XFS_FS_GEOM_MAX_STRUCT_VER (5) extern void xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version); extern int xfs_sb_read_secondary(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 5b2f27cbdb80..6cd45e8c118d 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -19,6 +19,7 @@ #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" +#include "xfs_rtbitmap.h" #define _ALLOC true #define _FREE false @@ -217,11 +218,12 @@ xfs_rtalloc_block_count( struct xfs_mount *mp, unsigned int num_ops) { - unsigned int blksz = XFS_FSB_TO_B(mp, 1); - unsigned int rtbmp_bytes; + unsigned int rtbmp_blocks; + xfs_rtxlen_t rtxlen; - rtbmp_bytes = (XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize) / NBBY; - return (howmany(rtbmp_bytes, blksz) + 1) * num_ops; + rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN); + rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen); + return (rtbmp_blocks + 1) * num_ops; } /* diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 5c2765934732..c299b16c9365 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -148,10 +148,10 @@ xfs_verify_rtbno( /* Verify that a realtime device extent is fully contained inside the volume. */ bool -xfs_verify_rtext( +xfs_verify_rtbext( struct xfs_mount *mp, xfs_rtblock_t rtbno, - xfs_rtblock_t len) + xfs_filblks_t len) { if (rtbno + len <= rtbno) return false; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 851220021484..533200c4ccc2 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -11,6 +11,7 @@ typedef uint32_t prid_t; /* project ID */ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ typedef uint32_t xfs_extlen_t; /* extent length in blocks */ +typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */ typedef uint32_t xfs_agnumber_t; /* allocation group number */ typedef uint64_t xfs_extnum_t; /* # of extents in a file */ typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */ @@ -18,6 +19,7 @@ typedef int64_t xfs_fsize_t; /* bytes in a file */ typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ +typedef uint32_t xfs_rtsumoff_t; /* offset of an rtsummary info word */ typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ typedef int64_t xfs_lsn_t; /* log sequence number */ @@ -31,6 +33,8 @@ typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ typedef uint64_t xfs_fileoff_t; /* block number in a file */ typedef uint64_t xfs_filblks_t; /* number of blocks in a file */ +typedef uint64_t xfs_rtxnum_t; /* rtextent number */ +typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ @@ -47,6 +51,7 @@ typedef void * xfs_failaddr_t; #define NULLRFSBLOCK ((xfs_rfsblock_t)-1) #define NULLRTBLOCK ((xfs_rtblock_t)-1) #define NULLFILEOFF ((xfs_fileoff_t)-1) +#define NULLRTEXTNO ((xfs_rtxnum_t)-1) #define NULLAGBLOCK ((xfs_agblock_t)-1) #define NULLAGNUMBER ((xfs_agnumber_t)-1) @@ -145,6 +150,7 @@ typedef uint32_t xfs_dqid_t; */ #define XFS_NBBYLOG 3 /* log2(NBBY) */ #define XFS_WORDLOG 2 /* log2(sizeof(xfs_rtword_t)) */ +#define XFS_SUMINFOLOG 2 /* log2(sizeof(xfs_suminfo_t)) */ #define XFS_NBWORDLOG (XFS_NBBYLOG + XFS_WORDLOG) #define XFS_NBWORD (1 << XFS_NBWORDLOG) #define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) @@ -229,8 +235,8 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); -bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno, - xfs_rtblock_t len); +bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno, + xfs_filblks_t len); bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount); bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off); void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min, diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 75588915572e..06d8c1996a33 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -410,7 +410,7 @@ xchk_bmap_iextent( /* Make sure the extent points to a valid place. */ if (info->is_rt && - !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) + !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 05be757668bb..5799e9a94f1f 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -16,7 +16,7 @@ #include "xfs_health.h" #include "xfs_btree.h" #include "xfs_ag.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "scrub/scrub.h" diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 59d7912fb75f..889f556bc98f 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -20,6 +20,7 @@ #include "xfs_reflink.h" #include "xfs_rmap.h" #include "xfs_bmap_util.h" +#include "xfs_rtbitmap.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" @@ -225,7 +226,7 @@ xchk_inode_extsize( */ if ((flags & XFS_DIFLAG_RTINHERIT) && (flags & XFS_DIFLAG_EXTSZINHERIT) && - value % sc->mp->m_sb.sb_rextsize > 0) + xfs_extlen_to_rtxmod(sc->mp, value) > 0) xchk_ino_set_warning(sc, ino); } diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 008ddb599e13..41a1d89ae8e6 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -11,7 +11,7 @@ #include "xfs_mount.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "scrub/scrub.h" @@ -48,12 +48,12 @@ xchk_rtbitmap_rec( { struct xfs_scrub *sc = priv; xfs_rtblock_t startblock; - xfs_rtblock_t blockcount; + xfs_filblks_t blockcount; - startblock = rec->ar_startext * mp->m_sb.sb_rextsize; - blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize; + startblock = xfs_rtx_to_rtb(mp, rec->ar_startext); + blockcount = xfs_rtx_to_rtb(mp, rec->ar_extcount); - if (!xfs_verify_rtext(mp, startblock, blockcount)) + if (!xfs_verify_rtbext(mp, startblock, blockcount)) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } @@ -128,26 +128,22 @@ out: void xchk_xref_is_used_rt_space( struct xfs_scrub *sc, - xfs_rtblock_t fsbno, + xfs_rtblock_t rtbno, xfs_extlen_t len) { - xfs_rtblock_t startext; - xfs_rtblock_t endext; - xfs_rtblock_t extcount; + xfs_rtxnum_t startext; + xfs_rtxnum_t endext; bool is_free; int error; if (xchk_skip_xref(sc->sm)) return; - startext = fsbno; - endext = fsbno + len - 1; - do_div(startext, sc->mp->m_sb.sb_rextsize); - do_div(endext, sc->mp->m_sb.sb_rextsize); - extcount = endext - startext + 1; + startext = xfs_rtb_to_rtx(sc->mp, rtbno); + endext = xfs_rtb_to_rtx(sc->mp, rtbno + len - 1); xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); - error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount, - &is_free); + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, + endext - startext + 1, &is_free); if (!xchk_should_check_xref(sc, &error, NULL)) goto out_unlock; if (is_free) diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 437ed9acbb27..8b15c47408d0 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -13,7 +13,7 @@ #include "xfs_inode.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_bit.h" #include "xfs_bmap.h" #include "scrub/scrub.h" @@ -81,34 +81,45 @@ typedef unsigned int xchk_rtsumoff_t; static inline int xfsum_load( struct xfs_scrub *sc, - xchk_rtsumoff_t sumoff, - xfs_suminfo_t *info) + xfs_rtsumoff_t sumoff, + union xfs_suminfo_raw *rawinfo) { - return xfile_obj_load(sc->xfile, info, sizeof(xfs_suminfo_t), + return xfile_obj_load(sc->xfile, rawinfo, + sizeof(union xfs_suminfo_raw), sumoff << XFS_WORDLOG); } static inline int xfsum_store( struct xfs_scrub *sc, - xchk_rtsumoff_t sumoff, - const xfs_suminfo_t info) + xfs_rtsumoff_t sumoff, + const union xfs_suminfo_raw rawinfo) { - return xfile_obj_store(sc->xfile, &info, sizeof(xfs_suminfo_t), + return xfile_obj_store(sc->xfile, &rawinfo, + sizeof(union xfs_suminfo_raw), sumoff << XFS_WORDLOG); } static inline int xfsum_copyout( struct xfs_scrub *sc, - xchk_rtsumoff_t sumoff, - xfs_suminfo_t *info, + xfs_rtsumoff_t sumoff, + union xfs_suminfo_raw *rawinfo, unsigned int nr_words) { - return xfile_obj_load(sc->xfile, info, nr_words << XFS_WORDLOG, + return xfile_obj_load(sc->xfile, rawinfo, nr_words << XFS_WORDLOG, sumoff << XFS_WORDLOG); } +static inline xfs_suminfo_t +xchk_rtsum_inc( + struct xfs_mount *mp, + union xfs_suminfo_raw *v) +{ + v->old += 1; + return v->old; +} + /* Update the summary file to reflect the free extent that we've accumulated. */ STATIC int xchk_rtsum_record_free( @@ -121,23 +132,24 @@ xchk_rtsum_record_free( xfs_fileoff_t rbmoff; xfs_rtblock_t rtbno; xfs_filblks_t rtlen; - xchk_rtsumoff_t offs; + xfs_rtsumoff_t offs; unsigned int lenlog; - xfs_suminfo_t v = 0; + union xfs_suminfo_raw v; + xfs_suminfo_t value; int error = 0; if (xchk_should_terminate(sc, &error)) return error; /* Compute the relevant location in the rtsum file. */ - rbmoff = XFS_BITTOBLOCK(mp, rec->ar_startext); + rbmoff = xfs_rtx_to_rbmblock(mp, rec->ar_startext); lenlog = XFS_RTBLOCKLOG(rec->ar_extcount); - offs = XFS_SUMOFFS(mp, lenlog, rbmoff); + offs = xfs_rtsumoffs(mp, lenlog, rbmoff); - rtbno = rec->ar_startext * mp->m_sb.sb_rextsize; - rtlen = rec->ar_extcount * mp->m_sb.sb_rextsize; + rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); + rtlen = xfs_rtx_to_rtb(mp, rec->ar_extcount); - if (!xfs_verify_rtext(mp, rtbno, rtlen)) { + if (!xfs_verify_rtbext(mp, rtbno, rtlen)) { xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino); return -EFSCORRUPTED; } @@ -147,9 +159,9 @@ xchk_rtsum_record_free( if (error) return error; - v++; + value = xchk_rtsum_inc(sc->mp, &v); trace_xchk_rtsum_record_free(mp, rec->ar_startext, rec->ar_extcount, - lenlog, offs, v); + lenlog, offs, value); return xfsum_store(sc, offs, v); } @@ -160,12 +172,11 @@ xchk_rtsum_compute( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; - unsigned long long rtbmp_bytes; + unsigned long long rtbmp_blocks; /* If the bitmap size doesn't match the computed size, bail. */ - rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY); - if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) != - mp->m_rbmip->i_disk_size) + rtbmp_blocks = xfs_rtbitmap_blockcount(mp, mp->m_sb.sb_rextents); + if (XFS_FSB_TO_B(mp, rtbmp_blocks) != mp->m_rbmip->i_disk_size) return -EFSCORRUPTED; return xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtsum_record_free, @@ -177,14 +188,18 @@ STATIC int xchk_rtsum_compare( struct xfs_scrub *sc) { + struct xfs_rtalloc_args args = { + .mp = sc->mp, + .tp = sc->tp, + }; struct xfs_mount *mp = sc->mp; - struct xfs_buf *bp; struct xfs_bmbt_irec map; xfs_fileoff_t off; xchk_rtsumoff_t sumoff = 0; int nmap; for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) { + union xfs_suminfo_raw *ondisk_info; int error = 0; if (xchk_should_terminate(sc, &error)) @@ -205,22 +220,23 @@ xchk_rtsum_compare( } /* Read a block's worth of ondisk rtsummary file. */ - error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp); + error = xfs_rtsummary_read_buf(&args, off); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) return error; /* Read a block's worth of computed rtsummary file. */ error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize); if (error) { - xfs_trans_brelse(sc->tp, bp); + xfs_rtbuf_cache_relse(&args); return error; } - if (memcmp(bp->b_addr, sc->buf, + ondisk_info = xfs_rsumblock_infoptr(&args, 0); + if (memcmp(ondisk_info, sc->buf, mp->m_blockwsize << XFS_WORDLOG) != 0) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off); - xfs_trans_brelse(sc->tp, bp); + xfs_rtbuf_cache_relse(&args); sumoff += mp->m_blockwsize; } diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 46249e7b17e0..29afa4851235 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -13,6 +13,7 @@ #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ag.h" +#include "xfs_rtbitmap.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index cbd4d01e253c..4a8bc6f3c8f2 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1036,17 +1036,18 @@ TRACE_EVENT(xfarray_sort_stats, #ifdef CONFIG_XFS_RT TRACE_EVENT(xchk_rtsum_record_free, - TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, - uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v), - TP_ARGS(mp, start, len, log, pos, v), + TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, + xfs_rtbxlen_t len, unsigned int log, loff_t pos, + xfs_suminfo_t value), + TP_ARGS(mp, start, len, log, pos, value), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, rtdev) - __field(xfs_rtblock_t, start) + __field(xfs_rtxnum_t, start) __field(unsigned long long, len) __field(unsigned int, log) __field(loff_t, pos) - __field(xfs_suminfo_t, v) + __field(xfs_suminfo_t, value) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; @@ -1055,7 +1056,7 @@ TRACE_EVENT(xchk_rtsum_record_free, __entry->len = len; __entry->log = log; __entry->pos = pos; - __entry->v = v; + __entry->value = value; ), TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx log %u rsumpos 0x%llx sumcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1064,7 +1065,7 @@ TRACE_EVENT(xchk_rtsum_record_free, __entry->len, __entry->log, __entry->pos, - __entry->v) + __entry->value) ); #endif /* CONFIG_XFS_RT */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 40e0a1f1f753..731260a5af6d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -28,6 +28,7 @@ #include "xfs_icache.h" #include "xfs_iomap.h" #include "xfs_reflink.h" +#include "xfs_rtbitmap.h" /* Kernel only BMAP related definitions and functions */ @@ -75,28 +76,28 @@ xfs_bmap_rtalloc( { struct xfs_mount *mp = ap->ip->i_mount; xfs_fileoff_t orig_offset = ap->offset; - xfs_rtblock_t rtb; - xfs_extlen_t prod = 0; /* product factor for allocators */ + xfs_rtxnum_t rtx; + xfs_rtxlen_t prod = 0; /* product factor for allocators */ xfs_extlen_t mod = 0; /* product factor for allocators */ - xfs_extlen_t ralen = 0; /* realtime allocation length */ + xfs_rtxlen_t ralen = 0; /* realtime allocation length */ xfs_extlen_t align; /* minimum allocation alignment */ xfs_extlen_t orig_length = ap->length; xfs_extlen_t minlen = mp->m_sb.sb_rextsize; - xfs_extlen_t raminlen; + xfs_rtxlen_t raminlen; bool rtlocked = false; bool ignore_locality = false; int error; align = xfs_get_extsz_hint(ap->ip); retry: - prod = align / mp->m_sb.sb_rextsize; + prod = xfs_extlen_to_rtxlen(mp, align); error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, ap->eof, 0, ap->conv, &ap->offset, &ap->length); if (error) return error; ASSERT(ap->length); - ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); + ASSERT(xfs_extlen_to_rtxmod(mp, ap->length) == 0); /* * If we shifted the file offset downward to satisfy an extent size @@ -116,17 +117,14 @@ retry: prod = 1; /* * Set ralen to be the actual requested length in rtextents. - */ - ralen = ap->length / mp->m_sb.sb_rextsize; - /* + * * If the old value was close enough to XFS_BMBT_MAX_EXTLEN that * we rounded up to it, cut it back so it's valid again. * Note that if it's a really large request (bigger than * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ - if (ralen * mp->m_sb.sb_rextsize >= XFS_MAX_BMBT_EXTLEN) - ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize; + ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN)); /* * Lock out modifications to both the RT bitmap and summary inodes @@ -144,12 +142,10 @@ retry: * pick an extent that will space things out in the rt area. */ if (ap->eof && ap->offset == 0) { - xfs_rtblock_t rtx; /* realtime extent no */ - error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); if (error) return error; - ap->blkno = rtx * mp->m_sb.sb_rextsize; + ap->blkno = xfs_rtx_to_rtb(mp, rtx); } else { ap->blkno = 0; } @@ -160,20 +156,18 @@ retry: * Realtime allocation, done through xfs_rtallocate_extent. */ if (ignore_locality) - ap->blkno = 0; + rtx = 0; else - do_div(ap->blkno, mp->m_sb.sb_rextsize); - rtb = ap->blkno; - ap->length = ralen; - raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize); - error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length, - &ralen, ap->wasdel, prod, &rtb); + rtx = xfs_rtb_to_rtx(mp, ap->blkno); + raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen)); + error = xfs_rtallocate_extent(ap->tp, rtx, raminlen, ralen, &ralen, + ap->wasdel, prod, &rtx); if (error) return error; - if (rtb != NULLRTBLOCK) { - ap->blkno = rtb * mp->m_sb.sb_rextsize; - ap->length = ralen * mp->m_sb.sb_rextsize; + if (rtx != NULLRTEXTNO) { + ap->blkno = xfs_rtx_to_rtb(mp, rtx); + ap->length = xfs_rtxlen_to_extlen(mp, ralen); ap->ip->i_nblocks += ap->length; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) @@ -690,7 +684,7 @@ xfs_can_free_eofblocks( */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) - end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize); + end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return false; @@ -780,12 +774,10 @@ xfs_alloc_file_space( { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; - xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; xfs_fileoff_t endoffset_fsb; - int nimaps; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imaps[1], *imapp; @@ -808,7 +800,6 @@ xfs_alloc_file_space( count = len; imapp = &imaps[0]; - nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); allocatesize_fsb = endoffset_fsb - startoffset_fsb; @@ -819,6 +810,7 @@ xfs_alloc_file_space( while (allocatesize_fsb && !error) { xfs_fileoff_t s, e; unsigned int dblocks, rblocks, resblks; + int nimaps = 1; /* * Determine space reservations for data/realtime. @@ -884,15 +876,19 @@ xfs_alloc_file_space( if (error) break; - allocated_fsb = imapp->br_blockcount; - - if (nimaps == 0) { - error = -ENOSPC; - break; + /* + * If the allocator cannot find a single free extent large + * enough to cover the start block of the requested range, + * xfs_bmapi_write will return 0 but leave *nimaps set to 0. + * + * In that case we simply need to keep looping with the same + * startoffset_fsb so that one of the following allocations + * will eventually reach the requested range. + */ + if (nimaps) { + startoffset_fsb += imapp->br_blockcount; + allocatesize_fsb -= imapp->br_blockcount; } - - startoffset_fsb += allocated_fsb; - allocatesize_fsb -= allocated_fsb; } return error; @@ -989,10 +985,8 @@ xfs_free_file_space( /* We can only free complete realtime extents. */ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) { - startoffset_fsb = roundup_64(startoffset_fsb, - mp->m_sb.sb_rextsize); - endoffset_fsb = rounddown_64(endoffset_fsb, - mp->m_sb.sb_rextsize); + startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb); + endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb); } /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 203700278ddb..e33e5e13b95f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -214,6 +214,43 @@ xfs_ilock_iocb( return 0; } +static int +xfs_ilock_iocb_for_write( + struct kiocb *iocb, + unsigned int *lock_mode) +{ + ssize_t ret; + struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); + + ret = xfs_ilock_iocb(iocb, *lock_mode); + if (ret) + return ret; + + if (*lock_mode == XFS_IOLOCK_EXCL) + return 0; + if (!xfs_iflags_test(ip, XFS_IREMAPPING)) + return 0; + + xfs_iunlock(ip, *lock_mode); + *lock_mode = XFS_IOLOCK_EXCL; + return xfs_ilock_iocb(iocb, *lock_mode); +} + +static unsigned int +xfs_ilock_for_write_fault( + struct xfs_inode *ip) +{ + /* get a shared lock if no remapping in progress */ + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + if (!xfs_iflags_test(ip, XFS_IREMAPPING)) + return XFS_MMAPLOCK_SHARED; + + /* wait for remapping to complete */ + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + return XFS_MMAPLOCK_EXCL; +} + STATIC ssize_t xfs_file_dio_read( struct kiocb *iocb, @@ -551,7 +588,7 @@ xfs_file_dio_write_aligned( unsigned int iolock = XFS_IOLOCK_SHARED; ssize_t ret; - ret = xfs_ilock_iocb(iocb, iolock); + ret = xfs_ilock_iocb_for_write(iocb, &iolock); if (ret) return ret; ret = xfs_file_write_checks(iocb, from, &iolock); @@ -618,7 +655,7 @@ retry_exclusive: flags = IOMAP_DIO_FORCE_WAIT; } - ret = xfs_ilock_iocb(iocb, iolock); + ret = xfs_ilock_iocb_for_write(iocb, &iolock); if (ret) return ret; @@ -1180,7 +1217,7 @@ xfs_file_remap_range( if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) xfs_log_force_inode(dest); out_unlock: - xfs_iunlock2_io_mmap(src, dest); + xfs_iunlock2_remapping(src, dest); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return remapped > 0 ? remapped : ret; @@ -1328,6 +1365,7 @@ __xfs_filemap_fault( struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); vm_fault_t ret; + unsigned int lock_mode = 0; trace_xfs_filemap_fault(ip, order, write_fault); @@ -1336,25 +1374,24 @@ __xfs_filemap_fault( file_update_time(vmf->vma->vm_file); } + if (IS_DAX(inode) || write_fault) + lock_mode = xfs_ilock_for_write_fault(XFS_I(inode)); + if (IS_DAX(inode)) { pfn_t pfn; - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = xfs_dax_fault(vmf, order, write_fault, &pfn); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, order, pfn); - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + } else if (write_fault) { + ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops); } else { - if (write_fault) { - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = iomap_page_mkwrite(vmf, - &xfs_page_mkwrite_iomap_ops); - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - } else { - ret = filemap_fault(vmf); - } + ret = filemap_fault(vmf); } + if (lock_mode) + xfs_iunlock(XFS_I(inode), lock_mode); + if (write_fault) sb_end_pagefault(inode->i_sb); return ret; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 736e5545f584..5a72217f5feb 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -23,7 +23,7 @@ #include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_ag.h" /* Convert an xfs_fsmap to an fsmap. */ @@ -483,11 +483,11 @@ xfs_getfsmap_rtdev_rtbitmap_helper( xfs_rtblock_t rtbno; xfs_daddr_t rec_daddr, len_daddr; - rtbno = rec->ar_startext * mp->m_sb.sb_rextsize; + rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); rec_daddr = XFS_FSB_TO_BB(mp, rtbno); irec.rm_startblock = rtbno; - rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize; + rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); len_daddr = XFS_FSB_TO_BB(mp, rtbno); irec.rm_blockcount = rtbno; @@ -514,7 +514,7 @@ xfs_getfsmap_rtdev_rtbitmap( uint64_t eofs; int error; - eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize); + eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); if (keys[0].fmr_physical >= eofs) return 0; start_rtb = XFS_BB_TO_FSBT(mp, @@ -539,11 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap( * Set up query parameters to return free rtextents covering the range * we want. */ - alow.ar_startext = start_rtb; - ahigh.ar_startext = end_rtb; - do_div(alow.ar_startext, mp->m_sb.sb_rextsize); - if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize)) - ahigh.ar_startext++; + alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb); + ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb); error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh, xfs_getfsmap_rtdev_rtbitmap_helper, info); if (error) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 36f5cf802c07..c0f1c89786c2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -918,6 +918,13 @@ xfs_droplink( xfs_trans_t *tp, xfs_inode_t *ip) { + if (VFS_I(ip)->i_nlink == 0) { + xfs_alert(ip->i_mount, + "%s: Attempt to drop inode (%llu) with nlink zero.", + __func__, ip->i_ino); + return -EFSCORRUPTED; + } + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); drop_nlink(VFS_I(ip)); @@ -3621,6 +3628,23 @@ xfs_iunlock2_io_mmap( inode_unlock(VFS_I(ip1)); } +/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */ +void +xfs_iunlock2_remapping( + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + xfs_iflags_clear(ip1, XFS_IREMAPPING); + + if (ip1 != ip2) + xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED); + xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); + + if (ip1 != ip2) + inode_unlock_shared(VFS_I(ip1)); + inode_unlock(VFS_I(ip2)); +} + /* * Reload the incore inode list for this inode. Caller should ensure that * the link count cannot change, either by taking ILOCK_SHARED or otherwise diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0c5bdb91152e..3dc47937da5d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) /* Quotacheck is running but inode has not been added to quota counts. */ #define XFS_IQUOTAUNCHECKED (1 << 14) +/* + * Remap in progress. Callers that wish to update file data while + * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake + * the lock in exclusive mode. Relocking the file will block until + * IREMAPPING is cleared. + */ +#define XFS_IREMAPPING (1U << 15) + /* All inode state flags related to inode reclaim. */ #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ XFS_IRECLAIM | \ @@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); +void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); static inline bool xfs_inode_unlinked_incomplete( diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 17c51804f9c6..cd7803fda8b1 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -19,6 +19,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_error.h" +#include "xfs_rtbitmap.h" #include <linux/iversion.h> @@ -107,7 +108,7 @@ xfs_inode_item_precommit( */ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && - (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { + xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) { ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT); ip->i_extsize = 0; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 55bb01173cde..a82470e027f7 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -38,6 +38,7 @@ #include "xfs_reflink.h" #include "xfs_ioctl.h" #include "xfs_xattr.h" +#include "xfs_rtbitmap.h" #include <linux/mount.h> #include <linux/namei.h> @@ -1004,7 +1005,7 @@ xfs_fill_fsxattr( * later. */ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && - ip->i_extsize % mp->m_sb.sb_rextsize > 0) { + xfs_extlen_to_rtxmod(mp, ip->i_extsize) > 0) { fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); fa->fsx_extsize = 0; @@ -1130,7 +1131,7 @@ xfs_ioctl_setattr_xflags( /* If realtime flag is set then must have realtime device */ if (fa->fsx_xflags & FS_XFLAG_REALTIME) { if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || - (ip->i_extsize % mp->m_sb.sb_rextsize)) + xfs_extlen_to_rtxmod(mp, ip->i_extsize)) return -EINVAL; } diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index e9d317a3dafe..d7873e0360f0 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -198,6 +198,18 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) return x; } +/* If @b is a power of 2, return log2(b). Else return -1. */ +static inline int8_t log2_if_power2(unsigned long b) +{ + return is_power_of_2(b) ? ilog2(b) : -1; +} + +/* If @b is a power of 2, return a mask of the lower bits, else return zero. */ +static inline unsigned long long mask64_if_power2(unsigned long b) +{ + return is_power_of_2(b) ? b - 1 : 0; +} + int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, char *data, enum req_op op); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 219681d29fbc..503fe3c7edbf 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -101,9 +101,9 @@ typedef struct xfs_mount { /* * Optional cache of rt summary level per bitmap block with the - * invariant that m_rsum_cache[bbno] <= the minimum i for which - * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip - * inode lock. + * invariant that m_rsum_cache[bbno] > the maximum i for which + * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i. + * Reads and writes are serialized by the rsumip inode lock. */ uint8_t *m_rsum_cache; struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ @@ -119,6 +119,7 @@ typedef struct xfs_mount { uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ uint8_t m_agno_log; /* log #ag's */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ + int8_t m_rtxblklog; /* log2 of rextsize, if possible */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ @@ -152,6 +153,7 @@ typedef struct xfs_mount { uint64_t m_features; /* active filesystem features */ uint64_t m_low_space[XFS_LOWSP_MAX]; uint64_t m_low_rtexts[XFS_LOWSP_MAX]; + uint64_t m_rtxblkmask; /* rt extent block mask */ struct xfs_ino_geometry m_ino_geo; /* inode geometry */ struct xfs_trans_resv m_resv; /* precomputed res values */ /* low free space thresholds */ diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index c4cc99b70dd3..21a7e350b4c5 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -72,6 +72,10 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4); + /* realtime structures */ + XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4); + XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4); + /* * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to * 4 bytes anyway so it's not obviously a problem. Hence for the moment diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index eb9102453aff..658edee8381d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep( if (ret) goto out_unlock; + xfs_iflags_set(src, XFS_IREMAPPING); + if (inode_in != inode_out) + xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); + return 0; out_unlock: xfs_iunlock2_io_mmap(src, dest); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 2e1a4e5cd03d..88c48de5c9c8 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -19,6 +19,7 @@ #include "xfs_icache.h" #include "xfs_rtalloc.h" #include "xfs_sb.h" +#include "xfs_rtbitmap.h" /* * Read and return the summary information for a given extent size, @@ -28,48 +29,48 @@ */ static int xfs_rtget_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_suminfo_t *sum) /* out: summary info for this block */ + struct xfs_rtalloc_args *args, + int log, /* log2 of extent size */ + xfs_fileoff_t bbno, /* bitmap block number */ + xfs_suminfo_t *sum) /* out: summary info for this block */ { - return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum); + return xfs_rtmodify_summary_int(args, log, bbno, 0, sum); } /* * Return whether there are any free extents in the size range given * by low and high, for the bitmap block bbno. */ -STATIC int /* error */ +STATIC int xfs_rtany_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int low, /* low log2 extent size */ - int high, /* high log2 extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - int *stat) /* out: any good extents here? */ + struct xfs_rtalloc_args *args, + int low, /* low log2 extent size */ + int high, /* high log2 extent size */ + xfs_fileoff_t bbno, /* bitmap block number */ + int *maxlog) /* out: max log2 extent size free */ { - int error; /* error value */ - int log; /* loop counter, log2 of ext. size */ - xfs_suminfo_t sum; /* summary data */ - - /* There are no extents at levels < m_rsum_cache[bbno]. */ - if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno]) - low = mp->m_rsum_cache[bbno]; + struct xfs_mount *mp = args->mp; + int error; + int log; /* loop counter, log2 of ext. size */ + xfs_suminfo_t sum; /* summary data */ + + /* There are no extents at levels >= m_rsum_cache[bbno]. */ + if (mp->m_rsum_cache) { + high = min(high, mp->m_rsum_cache[bbno] - 1); + if (low > high) { + *maxlog = -1; + return 0; + } + } /* * Loop over logs of extent sizes. */ - for (log = low; log <= high; log++) { + for (log = high; log >= low; log--) { /* * Get one summary datum. */ - error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); + error = xfs_rtget_summary(args, log, bbno, &sum); if (error) { return error; } @@ -77,18 +78,18 @@ xfs_rtany_summary( * If there are any, return success. */ if (sum) { - *stat = 1; + *maxlog = log; goto out; } } /* * Found nothing, return failure. */ - *stat = 0; + *maxlog = -1; out: - /* There were no extents at levels < log. */ - if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno] = log; + /* There were no extents at levels > log. */ + if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno]) + mp->m_rsum_cache[bbno] = log + 1; return 0; } @@ -97,60 +98,54 @@ out: * Copy and transform the summary file, given the old and new * parameters in the mount structures. */ -STATIC int /* error */ +STATIC int xfs_rtcopy_summary( - xfs_mount_t *omp, /* old file system mount point */ - xfs_mount_t *nmp, /* new file system mount point */ - xfs_trans_t *tp) /* transaction pointer */ + struct xfs_rtalloc_args *oargs, + struct xfs_rtalloc_args *nargs) { - xfs_rtblock_t bbno; /* bitmap block number */ - struct xfs_buf *bp; /* summary buffer */ - int error; /* error return value */ - int log; /* summary level number (log length) */ - xfs_suminfo_t sum; /* summary data */ - xfs_fsblock_t sumbno; /* summary block number */ + xfs_fileoff_t bbno; /* bitmap block number */ + int error; + int log; /* summary level number (log length) */ + xfs_suminfo_t sum; /* summary data */ - bp = NULL; - for (log = omp->m_rsumlevels - 1; log >= 0; log--) { - for (bbno = omp->m_sb.sb_rbmblocks - 1; + for (log = oargs->mp->m_rsumlevels - 1; log >= 0; log--) { + for (bbno = oargs->mp->m_sb.sb_rbmblocks - 1; (xfs_srtblock_t)bbno >= 0; bbno--) { - error = xfs_rtget_summary(omp, tp, log, bbno, &bp, - &sumbno, &sum); + error = xfs_rtget_summary(oargs, log, bbno, &sum); if (error) - return error; + goto out; if (sum == 0) continue; - error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, - &bp, &sumbno); + error = xfs_rtmodify_summary(oargs, log, bbno, -sum); if (error) - return error; - error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, - &bp, &sumbno); + goto out; + error = xfs_rtmodify_summary(nargs, log, bbno, sum); if (error) - return error; + goto out; ASSERT(sum > 0); } } + error = 0; +out: + xfs_rtbuf_cache_relse(oargs); return 0; } /* * Mark an extent specified by start and len allocated. * Updates all the summary information as well as the bitmap. */ -STATIC int /* error */ +STATIC int xfs_rtallocate_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* start block to allocate */ - xfs_extlen_t len, /* length to allocate */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* start rtext to allocate */ + xfs_rtxlen_t len) /* in/out: summary block number */ { - xfs_rtblock_t end; /* end of the allocated extent */ - int error; /* error value */ - xfs_rtblock_t postblock = 0; /* first block allocated > end */ - xfs_rtblock_t preblock = 0; /* first block allocated < start */ + struct xfs_mount *mp = args->mp; + xfs_rtxnum_t end; /* end of the allocated rtext */ + int error; + xfs_rtxnum_t postblock = 0; /* first rtext allocated > end */ + xfs_rtxnum_t preblock = 0; /* first rtext allocated < start */ end = start + len - 1; /* @@ -158,15 +153,15 @@ xfs_rtallocate_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + error = xfs_rtfind_back(args, start, 0, &preblock); if (error) { return error; } /* * Find the next allocated block (end of free extent). */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); + error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1, + &postblock); if (error) { return error; } @@ -174,9 +169,9 @@ xfs_rtallocate_range( * Decrement the summary information corresponding to the entire * (old) free extent. */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + error = xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + xfs_rtx_to_rbmblock(mp, preblock), -1); if (error) { return error; } @@ -185,9 +180,9 @@ xfs_rtallocate_range( * old extent, add summary data for them to be free. */ if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + error = xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(start - preblock), + xfs_rtx_to_rbmblock(mp, preblock), 1); if (error) { return error; } @@ -197,9 +192,9 @@ xfs_rtallocate_range( * old extent, add summary data for them to be free. */ if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); + error = xfs_rtmodify_summary(args, + XFS_RTBLOCKLOG(postblock - end), + xfs_rtx_to_rbmblock(mp, end + 1), 1); if (error) { return error; } @@ -207,54 +202,69 @@ xfs_rtallocate_range( /* * Modify the bitmap to mark this extent allocated. */ - error = xfs_rtmodify_range(mp, tp, start, len, 0); + error = xfs_rtmodify_range(args, start, len, 0); return error; } /* + * Make sure we don't run off the end of the rt volume. Be careful that + * adjusting maxlen downwards doesn't cause us to fail the alignment checks. + */ +static inline xfs_rtxlen_t +xfs_rtallocate_clamp_len( + struct xfs_mount *mp, + xfs_rtxnum_t startrtx, + xfs_rtxlen_t rtxlen, + xfs_rtxlen_t prod) +{ + xfs_rtxlen_t ret; + + ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx; + return rounddown(ret, prod); +} + +/* * Attempt to allocate an extent minlen<=len<=maxlen starting from * bitmap block bbno. If we don't get maxlen then use prod to trim - * the length, if given. Returns error; returns starting block in *rtblock. + * the length, if given. Returns error; returns starting block in *rtx. * The lengths are all in rtextents. */ -STATIC int /* error */ +STATIC int xfs_rtallocate_extent_block( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bbno, /* bitmap block number */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ - xfs_rtblock_t *nextp, /* out: next block to try */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock) /* out: start block allocated */ + struct xfs_rtalloc_args *args, + xfs_fileoff_t bbno, /* bitmap block number */ + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ + xfs_rtxnum_t *nextp, /* out: next rtext to try */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - xfs_rtblock_t besti; /* best rtblock found so far */ - xfs_rtblock_t bestlen; /* best length found so far */ - xfs_rtblock_t end; /* last rtblock in chunk */ - int error; /* error value */ - xfs_rtblock_t i; /* current rtblock trying */ - xfs_rtblock_t next; /* next rtblock to try */ - int stat; /* status from internal calls */ + struct xfs_mount *mp = args->mp; + xfs_rtxnum_t besti; /* best rtext found so far */ + xfs_rtxnum_t bestlen;/* best length found so far */ + xfs_rtxnum_t end; /* last rtext in chunk */ + int error; + xfs_rtxnum_t i; /* current rtext trying */ + xfs_rtxnum_t next; /* next rtext to try */ + int stat; /* status from internal calls */ /* * Loop over all the extents starting in this bitmap block, * looking for one that's long enough. */ - for (i = XFS_BLOCKTOBIT(mp, bbno), besti = -1, bestlen = 0, - end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1; + for (i = xfs_rbmblock_to_rtx(mp, bbno), besti = -1, bestlen = 0, + end = xfs_rbmblock_to_rtx(mp, bbno + 1) - 1; i <= end; i++) { /* Make sure we don't scan off the end of the rt volume. */ - maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i; + maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); /* * See if there's a free extent of maxlen starting at i. * If it's not so then next will contain the first non-free. */ - error = xfs_rtcheck_range(mp, tp, i, maxlen, 1, &next, &stat); + error = xfs_rtcheck_range(args, i, maxlen, 1, &next, &stat); if (error) { return error; } @@ -262,13 +272,12 @@ xfs_rtallocate_extent_block( /* * i for maxlen is all free, allocate and return that. */ - error = xfs_rtallocate_range(mp, tp, i, maxlen, rbpp, - rsb); + error = xfs_rtallocate_range(args, i, maxlen); if (error) { return error; } *len = maxlen; - *rtblock = i; + *rtx = i; return 0; } /* @@ -278,7 +287,7 @@ xfs_rtallocate_extent_block( * so far, remember it. */ if (minlen < maxlen) { - xfs_rtblock_t thislen; /* this extent size */ + xfs_rtxnum_t thislen; /* this extent size */ thislen = next - i; if (thislen >= minlen && thislen > bestlen) { @@ -290,7 +299,7 @@ xfs_rtallocate_extent_block( * If not done yet, find the start of the next free space. */ if (next < end) { - error = xfs_rtfind_forw(mp, tp, next, end, &i); + error = xfs_rtfind_forw(args, next, end, &i); if (error) { return error; } @@ -301,7 +310,7 @@ xfs_rtallocate_extent_block( * Searched the whole thing & didn't find a maxlen free extent. */ if (minlen < maxlen && besti != -1) { - xfs_extlen_t p; /* amount to trim length by */ + xfs_rtxlen_t p; /* amount to trim length by */ /* * If size should be a multiple of prod, make that so. @@ -315,51 +324,49 @@ xfs_rtallocate_extent_block( /* * Allocate besti for bestlen & return that. */ - error = xfs_rtallocate_range(mp, tp, besti, bestlen, rbpp, rsb); + error = xfs_rtallocate_range(args, besti, bestlen); if (error) { return error; } *len = bestlen; - *rtblock = besti; + *rtx = besti; return 0; } /* * Allocation failed. Set *nextp to the next block to try. */ *nextp = next; - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } /* * Allocate an extent of length minlen<=len<=maxlen, starting at block * bno. If we don't get maxlen then use prod to trim the length, if given. - * Returns error; returns starting block in *rtblock. + * Returns error; returns starting block in *rtx. * The lengths are all in rtextents. */ -STATIC int /* error */ +STATIC int xfs_rtallocate_extent_exact( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to allocate */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock) /* out: start block allocated */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext number to allocate */ + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - int error; /* error value */ - xfs_extlen_t i; /* extent length trimmed due to prod */ - int isfree; /* extent is free */ - xfs_rtblock_t next; /* next block to try (dummy) */ + int error; + xfs_rtxlen_t i; /* extent length trimmed due to prod */ + int isfree; /* extent is free */ + xfs_rtxnum_t next; /* next rtext to try (dummy) */ - ASSERT(minlen % prod == 0 && maxlen % prod == 0); + ASSERT(minlen % prod == 0); + ASSERT(maxlen % prod == 0); /* * Check if the range in question (for maxlen) is free. */ - error = xfs_rtcheck_range(mp, tp, bno, maxlen, 1, &next, &isfree); + error = xfs_rtcheck_range(args, start, maxlen, 1, &next, &isfree); if (error) { return error; } @@ -367,23 +374,23 @@ xfs_rtallocate_extent_exact( /* * If it is, allocate it and return success. */ - error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb); + error = xfs_rtallocate_range(args, start, maxlen); if (error) { return error; } *len = maxlen; - *rtblock = bno; + *rtx = start; return 0; } /* * If not, allocate what there is, if it's at least minlen. */ - maxlen = next - bno; + maxlen = next - start; if (maxlen < minlen) { /* * Failed, return failure status. */ - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } /* @@ -395,81 +402,82 @@ xfs_rtallocate_extent_exact( /* * Now we can't do it, return failure status. */ - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } } /* * Allocate what we can and return it. */ - error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb); + error = xfs_rtallocate_range(args, start, maxlen); if (error) { return error; } *len = maxlen; - *rtblock = bno; + *rtx = start; return 0; } /* * Allocate an extent of length minlen<=len<=maxlen, starting as near - * to bno as possible. If we don't get maxlen then use prod to trim + * to start as possible. If we don't get maxlen then use prod to trim * the length, if given. The lengths are all in rtextents. */ -STATIC int /* error */ +STATIC int xfs_rtallocate_extent_near( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to allocate */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock) /* out: start block allocated */ + struct xfs_rtalloc_args *args, + xfs_rtxnum_t start, /* starting rtext number to allocate */ + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - int any; /* any useful extents from summary */ - xfs_rtblock_t bbno; /* bitmap block number */ - int error; /* error value */ - int i; /* bitmap block offset (loop control) */ - int j; /* secondary loop control */ - int log2len; /* log2 of minlen */ - xfs_rtblock_t n; /* next block to try */ - xfs_rtblock_t r; /* result block */ - - ASSERT(minlen % prod == 0 && maxlen % prod == 0); + struct xfs_mount *mp = args->mp; + int maxlog; /* max useful extent from summary */ + xfs_fileoff_t bbno; /* bitmap block number */ + int error; + int i; /* bitmap block offset (loop control) */ + int j; /* secondary loop control */ + int log2len; /* log2 of minlen */ + xfs_rtxnum_t n; /* next rtext to try */ + xfs_rtxnum_t r; /* result rtext */ + + ASSERT(minlen % prod == 0); + ASSERT(maxlen % prod == 0); + /* * If the block number given is off the end, silently set it to * the last block. */ - if (bno >= mp->m_sb.sb_rextents) - bno = mp->m_sb.sb_rextents - 1; + if (start >= mp->m_sb.sb_rextents) + start = mp->m_sb.sb_rextents - 1; /* Make sure we don't run off the end of the rt volume. */ - maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno; + maxlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod); if (maxlen < minlen) { - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } /* * Try the exact allocation first. */ - error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, len, - rbpp, rsb, prod, &r); + error = xfs_rtallocate_extent_exact(args, start, minlen, maxlen, len, + prod, &r); if (error) { return error; } /* * If the exact allocation worked, return that. */ - if (r != NULLRTBLOCK) { - *rtblock = r; + if (r != NULLRTEXTNO) { + *rtx = r; return 0; } - bbno = XFS_BITTOBLOCK(mp, bno); + bbno = xfs_rtx_to_rbmblock(mp, start); i = 0; + j = -1; ASSERT(minlen != 0); log2len = xfs_highbit32(minlen); /* @@ -480,8 +488,8 @@ xfs_rtallocate_extent_near( * Get summary information of extents of all useful levels * starting in this bitmap block. */ - error = xfs_rtany_summary(mp, tp, log2len, mp->m_rsumlevels - 1, - bbno + i, rbpp, rsb, &any); + error = xfs_rtany_summary(args, log2len, mp->m_rsumlevels - 1, + bbno + i, &maxlog); if (error) { return error; } @@ -489,7 +497,10 @@ xfs_rtallocate_extent_near( * If there are any useful extents starting here, try * allocating one. */ - if (any) { + if (maxlog >= 0) { + xfs_extlen_t maxavail = + min_t(xfs_rtblock_t, maxlen, + (1ULL << (maxlog + 1)) - 1); /* * On the positive side of the starting location. */ @@ -498,17 +509,17 @@ xfs_rtallocate_extent_near( * Try to allocate an extent starting in * this block. */ - error = xfs_rtallocate_extent_block(mp, tp, - bbno + i, minlen, maxlen, len, &n, rbpp, - rsb, prod, &r); + error = xfs_rtallocate_extent_block(args, + bbno + i, minlen, maxavail, len, + &n, prod, &r); if (error) { return error; } /* * If it worked, return it. */ - if (r != NULLRTBLOCK) { - *rtblock = r; + if (r != NULLRTEXTNO) { + *rtx = r; return 0; } } @@ -516,68 +527,46 @@ xfs_rtallocate_extent_near( * On the negative side of the starting location. */ else { /* i < 0 */ + int maxblocks; + /* - * Loop backwards through the bitmap blocks from - * the starting point-1 up to where we are now. - * There should be an extent which ends in this - * bitmap block and is long enough. + * Loop backwards to find the end of the extent + * we found in the realtime summary. + * + * maxblocks is the maximum possible number of + * bitmap blocks from the start of the extent + * to the end of the extent. */ - for (j = -1; j > i; j--) { - /* - * Grab the summary information for - * this bitmap block. - */ - error = xfs_rtany_summary(mp, tp, - log2len, mp->m_rsumlevels - 1, - bbno + j, rbpp, rsb, &any); - if (error) { - return error; - } - /* - * If there's no extent given in the - * summary that means the extent we - * found must carry over from an - * earlier block. If there is an - * extent given, we've already tried - * that allocation, don't do it again. - */ - if (any) - continue; - error = xfs_rtallocate_extent_block(mp, - tp, bbno + j, minlen, maxlen, - len, &n, rbpp, rsb, prod, &r); + if (maxlog == 0) + maxblocks = 0; + else if (maxlog < mp->m_blkbit_log) + maxblocks = 1; + else + maxblocks = 2 << (maxlog - mp->m_blkbit_log); + + /* + * We need to check bbno + i + maxblocks down to + * bbno + i. We already checked bbno down to + * bbno + j + 1, so we don't need to check those + * again. + */ + j = min(i + maxblocks, j); + for (; j >= i; j--) { + error = xfs_rtallocate_extent_block(args, + bbno + j, minlen, + maxavail, len, &n, prod, + &r); if (error) { return error; } /* * If it works, return the extent. */ - if (r != NULLRTBLOCK) { - *rtblock = r; + if (r != NULLRTEXTNO) { + *rtx = r; return 0; } } - /* - * There weren't intervening bitmap blocks - * with a long enough extent, or the - * allocation didn't work for some reason - * (i.e. it's a little * too short). - * Try to allocate from the summary block - * that we found. - */ - error = xfs_rtallocate_extent_block(mp, tp, - bbno + i, minlen, maxlen, len, &n, rbpp, - rsb, prod, &r); - if (error) { - return error; - } - /* - * If it works, return the extent. - */ - if (r != NULLRTBLOCK) { - *rtblock = r; - return 0; - } } } /* @@ -610,7 +599,7 @@ xfs_rtallocate_extent_near( else break; } - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } @@ -619,26 +608,25 @@ xfs_rtallocate_extent_near( * specified. If we don't get maxlen then use prod to trim * the length, if given. The lengths are all in rtextents. */ -STATIC int /* error */ +STATIC int xfs_rtallocate_extent_size( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ - struct xfs_buf **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock) /* out: start block allocated */ + struct xfs_rtalloc_args *args, + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - int error; /* error value */ - int i; /* bitmap block number */ - int l; /* level number (loop control) */ - xfs_rtblock_t n; /* next block to be tried */ - xfs_rtblock_t r; /* result block number */ - xfs_suminfo_t sum; /* summary information for extents */ - - ASSERT(minlen % prod == 0 && maxlen % prod == 0); + struct xfs_mount *mp = args->mp; + int error; + xfs_fileoff_t i; /* bitmap block number */ + int l; /* level number (loop control) */ + xfs_rtxnum_t n; /* next rtext to be tried */ + xfs_rtxnum_t r; /* result rtext number */ + xfs_suminfo_t sum; /* summary information for extents */ + + ASSERT(minlen % prod == 0); + ASSERT(maxlen % prod == 0); ASSERT(maxlen != 0); /* @@ -656,8 +644,7 @@ xfs_rtallocate_extent_size( /* * Get the summary for this level/block. */ - error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb, - &sum); + error = xfs_rtget_summary(args, l, i, &sum); if (error) { return error; } @@ -669,16 +656,16 @@ xfs_rtallocate_extent_size( /* * Try allocating the extent. */ - error = xfs_rtallocate_extent_block(mp, tp, i, maxlen, - maxlen, len, &n, rbpp, rsb, prod, &r); + error = xfs_rtallocate_extent_block(args, i, maxlen, + maxlen, len, &n, prod, &r); if (error) { return error; } /* * If it worked, return that. */ - if (r != NULLRTBLOCK) { - *rtblock = r; + if (r != NULLRTEXTNO) { + *rtx = r; return 0; } /* @@ -686,8 +673,8 @@ xfs_rtallocate_extent_size( * allocator is beyond the next bitmap block, * skip to that bitmap block. */ - if (XFS_BITTOBLOCK(mp, n) > i + 1) - i = XFS_BITTOBLOCK(mp, n) - 1; + if (xfs_rtx_to_rbmblock(mp, n) > i + 1) + i = xfs_rtx_to_rbmblock(mp, n) - 1; } } /* @@ -695,7 +682,7 @@ xfs_rtallocate_extent_size( * we're asking for a fixed size extent. */ if (minlen > --maxlen) { - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } ASSERT(minlen != 0); @@ -715,8 +702,7 @@ xfs_rtallocate_extent_size( /* * Get the summary information for this level/block. */ - error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb, - &sum); + error = xfs_rtget_summary(args, l, i, &sum); if (error) { return error; } @@ -730,18 +716,18 @@ xfs_rtallocate_extent_size( * minlen/maxlen are in the possible range for * this summary level. */ - error = xfs_rtallocate_extent_block(mp, tp, i, + error = xfs_rtallocate_extent_block(args, i, XFS_RTMAX(minlen, 1 << l), XFS_RTMIN(maxlen, (1 << (l + 1)) - 1), - len, &n, rbpp, rsb, prod, &r); + len, &n, prod, &r); if (error) { return error; } /* * If it worked, return that extent. */ - if (r != NULLRTBLOCK) { - *rtblock = r; + if (r != NULLRTEXTNO) { + *rtx = r; return 0; } /* @@ -749,14 +735,14 @@ xfs_rtallocate_extent_size( * allocator is beyond the next bitmap block, * skip to that bitmap block. */ - if (XFS_BITTOBLOCK(mp, n) > i + 1) - i = XFS_BITTOBLOCK(mp, n) - 1; + if (xfs_rtx_to_rbmblock(mp, n) > i + 1) + i = xfs_rtx_to_rbmblock(mp, n) - 1; } } /* * Got nothing, return failure. */ - *rtblock = NULLRTBLOCK; + *rtx = NULLRTEXTNO; return 0; } @@ -886,12 +872,14 @@ xfs_alloc_rsum_cache( xfs_extlen_t rbmblocks) /* number of rt bitmap blocks */ { /* - * The rsum cache is initialized to all zeroes, which is trivially a - * lower bound on the minimum level with any free extents. We can - * continue without the cache if it couldn't be allocated. + * The rsum cache is initialized to the maximum value, which is + * trivially an upper bound on the maximum level with any free extents. + * We can continue without the cache if it couldn't be allocated. */ - mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL); - if (!mp->m_rsum_cache) + mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL); + if (mp->m_rsum_cache) + memset(mp->m_rsum_cache, -1, rbmblocks); + else xfs_warn(mp, "could not allocate realtime summary cache"); } @@ -907,13 +895,13 @@ xfs_growfs_rt( xfs_mount_t *mp, /* mount point for filesystem */ xfs_growfs_rt_t *in) /* growfs rt input struct */ { - xfs_rtblock_t bmbno; /* bitmap block number */ + xfs_fileoff_t bmbno; /* bitmap block number */ struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ - xfs_rtblock_t nrextents; /* new number of realtime extents */ + xfs_rtxnum_t nrextents; /* new number of realtime extents */ uint8_t nrextslog; /* new log2 of sb_rextents */ xfs_extlen_t nrsumblocks; /* new number of summary blocks */ uint nrsumlevels; /* new rt summary levels */ @@ -922,7 +910,6 @@ xfs_growfs_rt( xfs_extlen_t rbmblocks; /* current number of rt bitmap blocks */ xfs_extlen_t rsumblocks; /* current number of rt summary blks */ xfs_sb_t *sbp; /* old superblock */ - xfs_fsblock_t sumbno; /* summary block number */ uint8_t *rsum_cache; /* old summary cache */ sbp = &mp->m_sb; @@ -954,7 +941,7 @@ xfs_growfs_rt( return -EINVAL; /* Unsupported realtime features. */ - if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)) + if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) return -EOPNOTSUPP; nrblocks = in->newblocks; @@ -976,11 +963,10 @@ xfs_growfs_rt( */ nrextents = nrblocks; do_div(nrextents, in->extsize); - nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); + nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents); nrextslog = xfs_highbit32(nrextents); nrsumlevels = nrextslog + 1; - nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks; - nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); + nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, nrbmblocks); nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); /* * New summary size can't be more than half the size of @@ -1023,6 +1009,12 @@ xfs_growfs_rt( ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); bmbno < nrbmblocks; bmbno++) { + struct xfs_rtalloc_args args = { + .mp = mp, + }; + struct xfs_rtalloc_args nargs = { + .mp = nmp, + }; struct xfs_trans *tp; xfs_rfsblock_t nrblocks_step; @@ -1032,19 +1024,17 @@ xfs_growfs_rt( * Calculate new sb and mount fields for this round. */ nsbp->sb_rextsize = in->extsize; + nmp->m_rtxblklog = -1; /* don't use shift or masking */ nsbp->sb_rbmblocks = bmbno + 1; nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize * nsbp->sb_rextsize; nsbp->sb_rblocks = min(nrblocks, nrblocks_step); - nsbp->sb_rextents = nsbp->sb_rblocks; - do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + nsbp->sb_rextents = xfs_rtb_to_rtx(nmp, nsbp->sb_rblocks); ASSERT(nsbp->sb_rextents != 0); nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; - nrsumsize = - (uint)sizeof(xfs_suminfo_t) * nrsumlevels * - nsbp->sb_rbmblocks; - nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); + nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, + nsbp->sb_rbmblocks); nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); /* * Start a transaction, get the log reservation. @@ -1053,6 +1043,9 @@ xfs_growfs_rt( &tp); if (error) break; + args.tp = tp; + nargs.tp = tp; + /* * Lock out other callers by grabbing the bitmap inode lock. */ @@ -1086,7 +1079,7 @@ xfs_growfs_rt( */ if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks || mp->m_rsumlevels != nmp->m_rsumlevels) { - error = xfs_rtcopy_summary(mp, nmp, tp); + error = xfs_rtcopy_summary(&args, &nargs); if (error) goto error_cancel; } @@ -1111,9 +1104,9 @@ xfs_growfs_rt( /* * Free new extent. */ - bp = NULL; - error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, - nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); + error = xfs_rtfree_range(&nargs, sbp->sb_rextents, + nsbp->sb_rextents - sbp->sb_rextents); + xfs_rtbuf_cache_relse(&nargs); if (error) { error_cancel: xfs_trans_cancel(tp); @@ -1171,59 +1164,60 @@ out_free: * parameters. The length units are all in realtime extents, as is the * result block number. */ -int /* error */ +int xfs_rtallocate_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to allocate */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ - int wasdel, /* was a delayed allocation extent */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock) /* out: start block allocated */ + struct xfs_trans *tp, + xfs_rtxnum_t start, /* starting rtext number to allocate */ + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ + int wasdel, /* was a delayed allocation extent */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtblock) /* out: start rtext allocated */ { - xfs_mount_t *mp = tp->t_mountp; - int error; /* error value */ - xfs_rtblock_t r; /* result allocated block */ - xfs_fsblock_t sb; /* summary file block number */ - struct xfs_buf *sumbp; /* summary file block buffer */ - - ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + struct xfs_rtalloc_args args = { + .mp = tp->t_mountp, + .tp = tp, + }; + int error; /* error value */ + xfs_rtxnum_t r; /* result allocated rtext */ + + ASSERT(xfs_isilocked(args.mp->m_rbmip, XFS_ILOCK_EXCL)); ASSERT(minlen > 0 && minlen <= maxlen); /* * If prod is set then figure out what to do to minlen and maxlen. */ if (prod > 1) { - xfs_extlen_t i; + xfs_rtxlen_t i; if ((i = maxlen % prod)) maxlen -= i; if ((i = minlen % prod)) minlen += prod - i; if (maxlen < minlen) { - *rtblock = NULLRTBLOCK; + *rtblock = NULLRTEXTNO; return 0; } } retry: - sumbp = NULL; - if (bno == 0) { - error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len, - &sumbp, &sb, prod, &r); + if (start == 0) { + error = xfs_rtallocate_extent_size(&args, minlen, + maxlen, len, prod, &r); } else { - error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen, - len, &sumbp, &sb, prod, &r); + error = xfs_rtallocate_extent_near(&args, start, minlen, + maxlen, len, prod, &r); } + xfs_rtbuf_cache_relse(&args); if (error) return error; /* * If it worked, update the superblock. */ - if (r != NULLRTBLOCK) { + if (r != NULLRTEXTNO) { long slen = (long)*len; ASSERT(*len >= minlen && *len <= maxlen); @@ -1250,6 +1244,7 @@ xfs_rtmount_init( struct xfs_buf *bp; /* buffer for last block of subvolume */ struct xfs_sb *sbp; /* filesystem superblock copy in mount */ xfs_daddr_t d; /* address of last block of subvolume */ + unsigned int rsumblocks; int error; sbp = &mp->m_sb; @@ -1261,10 +1256,9 @@ xfs_rtmount_init( return -ENODEV; } mp->m_rsumlevels = sbp->sb_rextslog + 1; - mp->m_rsumsize = - (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels * - sbp->sb_rbmblocks; - mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize); + rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, + mp->m_sb.sb_rbmblocks); + mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks); mp->m_rbmip = mp->m_rsumip = NULL; /* * Check that the realtime section is an ok size. @@ -1418,27 +1412,27 @@ xfs_rtunmount_inodes( * of rtextents and the fraction. * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ... */ -int /* error */ +int /* error */ xfs_rtpick_extent( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ - xfs_extlen_t len, /* allocation length (rtextents) */ - xfs_rtblock_t *pick) /* result rt extent */ - { - xfs_rtblock_t b; /* result block */ + xfs_rtxlen_t len, /* allocation length (rtextents) */ + xfs_rtxnum_t *pick) /* result rt extent */ +{ + xfs_rtxnum_t b; /* result rtext */ int log2; /* log of sequence number */ uint64_t resid; /* residual after log removed */ uint64_t seq; /* sequence number of file creation */ - struct timespec64 ts; /* temporary timespec64 storage */ + struct timespec64 ts; /* timespec in inode */ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + ts = inode_get_atime(VFS_I(mp->m_rbmip)); if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) { mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM; seq = 0; } else { - ts = inode_get_atime(VFS_I(mp->m_rbmip)); - seq = (uint64_t)ts.tv_sec; + seq = ts.tv_sec; } if ((log2 = xfs_highbit64(seq)) == -1) b = 0; @@ -1451,7 +1445,7 @@ xfs_rtpick_extent( if (b + len > mp->m_sb.sb_rextents) b = mp->m_sb.sb_rextents - len; } - ts.tv_sec = (time64_t)seq + 1; + ts.tv_sec = seq + 1; inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); *pick = b; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 62c7ad79cbb6..f7cb9ffe51ca 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -11,22 +11,6 @@ struct xfs_mount; struct xfs_trans; -/* - * XXX: Most of the realtime allocation functions deal in units of realtime - * extents, not realtime blocks. This looks funny when paired with the type - * name and screams for a larger cleanup. - */ -struct xfs_rtalloc_rec { - xfs_rtblock_t ar_startext; - xfs_rtblock_t ar_extcount; -}; - -typedef int (*xfs_rtalloc_query_range_fn)( - struct xfs_mount *mp, - struct xfs_trans *tp, - const struct xfs_rtalloc_rec *rec, - void *priv); - #ifdef CONFIG_XFS_RT /* * Function prototypes for exported functions. @@ -40,23 +24,14 @@ typedef int (*xfs_rtalloc_query_range_fn)( int /* error */ xfs_rtallocate_extent( struct xfs_trans *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to allocate */ - xfs_extlen_t minlen, /* minimum length to allocate */ - xfs_extlen_t maxlen, /* maximum length to allocate */ - xfs_extlen_t *len, /* out: actual length allocated */ + xfs_rtxnum_t start, /* starting rtext number to allocate */ + xfs_rtxlen_t minlen, /* minimum length to allocate */ + xfs_rtxlen_t maxlen, /* maximum length to allocate */ + xfs_rtxlen_t *len, /* out: actual length allocated */ int wasdel, /* was a delayed allocation extent */ - xfs_extlen_t prod, /* extent product factor */ - xfs_rtblock_t *rtblock); /* out: start block allocated */ + xfs_rtxlen_t prod, /* extent product factor */ + xfs_rtxnum_t *rtblock); /* out: start rtext allocated */ -/* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - struct xfs_trans *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len); /* length of extent freed */ /* * Initialize realtime fields in the mount structure. @@ -87,8 +62,8 @@ int /* error */ xfs_rtpick_extent( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - xfs_extlen_t len, /* allocation length (rtextents) */ - xfs_rtblock_t *pick); /* result rt extent */ + xfs_rtxlen_t len, /* allocation length (rtextents) */ + xfs_rtxnum_t *pick); /* result rt extent */ /* * Grow the realtime area of the filesystem. @@ -98,55 +73,12 @@ xfs_growfs_rt( struct xfs_mount *mp, /* file system mount structure */ xfs_growfs_rt_t *in); /* user supplied growfs struct */ -/* - * From xfs_rtbitmap.c - */ -int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t block, int issum, struct xfs_buf **bpp); -int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val, - xfs_rtblock_t *new, int *stat); -int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val); -int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, - int log, xfs_rtblock_t bbno, int delta, - struct xfs_buf **rbpp, xfs_fsblock_t *rsb, - xfs_suminfo_t *sum); -int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, - xfs_fsblock_t *rsb); -int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, - struct xfs_buf **rbpp, xfs_fsblock_t *rsb); -int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, - xfs_rtalloc_query_range_fn fn, void *priv); -int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtalloc_query_range_fn fn, - void *priv); -bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); -int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, - bool *is_free); int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); #else -# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) -# define xfs_rtfree_extent(t,b,l) (ENOSYS) -# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) -# define xfs_growfs_rt(mp,in) (ENOSYS) -# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) -# define xfs_rtalloc_query_all(m,t,f,p) (ENOSYS) -# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) -# define xfs_verify_rtbno(m, r) (false) -# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) -# define xfs_rtalloc_reinit_frextents(m) (0) +# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS) +# define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS) +# define xfs_growfs_rt(mp,in) (-ENOSYS) +# define xfs_rtalloc_reinit_frextents(m) (0) static inline int /* error */ xfs_rtmount_init( xfs_mount_t *mp) /* file system mount structure */ @@ -157,7 +89,7 @@ xfs_rtmount_init( xfs_warn(mp, "Not built with CONFIG_XFS_RT"); return -ENOSYS; } -# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) +# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS)) # define xfs_rtunmount_inodes(m) #endif /* CONFIG_XFS_RT */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f0ae07828153..764304595e8b 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -42,6 +42,7 @@ #include "xfs_xattr.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" +#include "xfs_rtbitmap.h" #include "scrub/stats.h" #include <linux/magic.h> @@ -896,7 +897,7 @@ xfs_fs_statfs( statp->f_blocks = sbp->sb_rblocks; freertx = percpu_counter_sum_positive(&mp->m_frextents); - statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize; + statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx); } return 0; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8c0bfc9a33b1..305c9d07bf1b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -24,6 +24,7 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_icache.h" +#include "xfs_rtbitmap.h" struct kmem_cache *xfs_trans_cache; @@ -655,6 +656,10 @@ xfs_trans_unreserve_and_mod_sb( mp->m_sb.sb_agcount += tp->t_agcount_delta; mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; mp->m_sb.sb_rextsize += tp->t_rextsize_delta; + if (tp->t_rextsize_delta) { + mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize); + mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize); + } mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; mp->m_sb.sb_rblocks += tp->t_rblocks_delta; mp->m_sb.sb_rextents += tp->t_rextents_delta; @@ -1196,7 +1201,7 @@ xfs_trans_alloc_inode( retry: error = xfs_trans_alloc(mp, resv, dblocks, - rblocks / mp->m_sb.sb_rextsize, + xfs_extlen_to_rtxlen(mp, rblocks), force ? XFS_TRANS_RESERVE : 0, &tp); if (error) return error; diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 90b69270f2fa..724c45e3e9a7 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -68,4 +68,9 @@ enum amd_asic_type { extern const char *amdgpu_asic_name[]; +struct amdgpu_asic_type_quirk { + unsigned short device; /* PCI device ID */ + u8 revision; /* revision ID */ + unsigned short type; /* real ASIC type */ +}; #endif /*__AMD_ASIC_TYPE_H__ */ diff --git a/include/dt-bindings/watchdog/aspeed-wdt.h b/include/dt-bindings/watchdog/aspeed-wdt.h new file mode 100644 index 000000000000..7ae6d84b2bd9 --- /dev/null +++ b/include/dt-bindings/watchdog/aspeed-wdt.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef DT_BINDINGS_ASPEED_WDT_H +#define DT_BINDINGS_ASPEED_WDT_H + +#define AST2500_WDT_RESET_CPU (1 << 0) +#define AST2500_WDT_RESET_COPROC (1 << 1) +#define AST2500_WDT_RESET_SDRAM (1 << 2) +#define AST2500_WDT_RESET_AHB (1 << 3) +#define AST2500_WDT_RESET_I2C (1 << 4) +#define AST2500_WDT_RESET_MAC0 (1 << 5) +#define AST2500_WDT_RESET_MAC1 (1 << 6) +#define AST2500_WDT_RESET_GRAPHICS (1 << 7) +#define AST2500_WDT_RESET_USB2_HOST_HUB (1 << 8) +#define AST2500_WDT_RESET_USB_HOST (1 << 9) +#define AST2500_WDT_RESET_HID_EHCI (1 << 10) +#define AST2500_WDT_RESET_VIDEO (1 << 11) +#define AST2500_WDT_RESET_HAC (1 << 12) +#define AST2500_WDT_RESET_LPC (1 << 13) +#define AST2500_WDT_RESET_SDIO (1 << 14) +#define AST2500_WDT_RESET_MIC (1 << 15) +#define AST2500_WDT_RESET_CRT (1 << 16) +#define AST2500_WDT_RESET_PWM (1 << 17) +#define AST2500_WDT_RESET_PECI (1 << 18) +#define AST2500_WDT_RESET_JTAG (1 << 19) +#define AST2500_WDT_RESET_ADC (1 << 20) +#define AST2500_WDT_RESET_GPIO (1 << 21) +#define AST2500_WDT_RESET_MCTP (1 << 22) +#define AST2500_WDT_RESET_XDMA (1 << 23) +#define AST2500_WDT_RESET_SPI (1 << 24) +#define AST2500_WDT_RESET_SOC_MISC (1 << 25) + +#define AST2500_WDT_RESET_DEFAULT 0x023ffff3 + +#define AST2600_WDT_RESET1_CPU (1 << 0) +#define AST2600_WDT_RESET1_SDRAM (1 << 1) +#define AST2600_WDT_RESET1_AHB (1 << 2) +#define AST2600_WDT_RESET1_SLI (1 << 3) +#define AST2600_WDT_RESET1_SOC_MISC0 (1 << 4) +#define AST2600_WDT_RESET1_COPROC (1 << 5) +#define AST2600_WDT_RESET1_USB_A (1 << 6) +#define AST2600_WDT_RESET1_USB_B (1 << 7) +#define AST2600_WDT_RESET1_UHCI (1 << 8) +#define AST2600_WDT_RESET1_GRAPHICS (1 << 9) +#define AST2600_WDT_RESET1_CRT (1 << 10) +#define AST2600_WDT_RESET1_VIDEO (1 << 11) +#define AST2600_WDT_RESET1_HAC (1 << 12) +#define AST2600_WDT_RESET1_DP (1 << 13) +#define AST2600_WDT_RESET1_DP_MCU (1 << 14) +#define AST2600_WDT_RESET1_GP_MCU (1 << 15) +#define AST2600_WDT_RESET1_MAC0 (1 << 16) +#define AST2600_WDT_RESET1_MAC1 (1 << 17) +#define AST2600_WDT_RESET1_SDIO0 (1 << 18) +#define AST2600_WDT_RESET1_JTAG0 (1 << 19) +#define AST2600_WDT_RESET1_MCTP0 (1 << 20) +#define AST2600_WDT_RESET1_MCTP1 (1 << 21) +#define AST2600_WDT_RESET1_XDMA0 (1 << 22) +#define AST2600_WDT_RESET1_XDMA1 (1 << 23) +#define AST2600_WDT_RESET1_GPIO0 (1 << 24) +#define AST2600_WDT_RESET1_RVAS (1 << 25) + +#define AST2600_WDT_RESET1_DEFAULT 0x030f1ff1 + +#define AST2600_WDT_RESET2_CPU (1 << 0) +#define AST2600_WDT_RESET2_SPI (1 << 1) +#define AST2600_WDT_RESET2_AHB2 (1 << 2) +#define AST2600_WDT_RESET2_SLI2 (1 << 3) +#define AST2600_WDT_RESET2_SOC_MISC1 (1 << 4) +#define AST2600_WDT_RESET2_MAC2 (1 << 5) +#define AST2600_WDT_RESET2_MAC3 (1 << 6) +#define AST2600_WDT_RESET2_SDIO1 (1 << 7) +#define AST2600_WDT_RESET2_JTAG1 (1 << 8) +#define AST2600_WDT_RESET2_GPIO1 (1 << 9) +#define AST2600_WDT_RESET2_MDIO (1 << 10) +#define AST2600_WDT_RESET2_LPC (1 << 11) +#define AST2600_WDT_RESET2_PECI (1 << 12) +#define AST2600_WDT_RESET2_PWM (1 << 13) +#define AST2600_WDT_RESET2_ADC (1 << 14) +#define AST2600_WDT_RESET2_FSI (1 << 15) +#define AST2600_WDT_RESET2_I2C (1 << 16) +#define AST2600_WDT_RESET2_I3C_GLOBAL (1 << 17) +#define AST2600_WDT_RESET2_I3C0 (1 << 18) +#define AST2600_WDT_RESET2_I3C1 (1 << 19) +#define AST2600_WDT_RESET2_I3C2 (1 << 20) +#define AST2600_WDT_RESET2_I3C3 (1 << 21) +#define AST2600_WDT_RESET2_I3C4 (1 << 22) +#define AST2600_WDT_RESET2_I3C5 (1 << 23) +#define AST2600_WDT_RESET2_ESPI (1 << 26) + +#define AST2600_WDT_RESET2_DEFAULT 0x03fffff1 + +#endif diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..dc7ed2f46886 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,126 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); - -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } diff --git a/include/linux/btf.h b/include/linux/btf.h index c2231c64d60b..59d404e22814 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -84,6 +84,17 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f3b3593254b9..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,6 +357,11 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) + extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_MODE (1 << 0) @@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 2 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 struct ceph_mds_request_head_old { __le16 version; /* struct version */ @@ -528,6 +533,9 @@ struct ceph_mds_request_head { __le32 ext_num_retry; /* new count retry attempts */ __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ } __attribute__ ((packed)); /* cap/lease release record */ diff --git a/include/linux/closure.h b/include/linux/closure.h index 722a586bb224..de7bb47d8a46 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -154,6 +154,7 @@ struct closure { struct closure *parent; atomic_t remaining; + bool closure_get_happened; #ifdef CONFIG_DEBUG_CLOSURES #define CLOSURE_MAGIC_DEAD 0xc054dead @@ -185,7 +186,11 @@ static inline unsigned closure_nr_remaining(struct closure *cl) */ static inline void closure_sync(struct closure *cl) { - if (closure_nr_remaining(cl) != 1) +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + + if (cl->closure_get_happened) __closure_sync(cl); } @@ -233,8 +238,6 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn, closure_set_ip(cl); cl->fn = fn; cl->wq = wq; - /* between atomic_dec() in closure_put() */ - smp_mb__before_atomic(); } static inline void closure_queue(struct closure *cl) @@ -259,6 +262,8 @@ static inline void closure_queue(struct closure *cl) */ static inline void closure_get(struct closure *cl) { + cl->closure_get_happened = true; + #ifdef CONFIG_DEBUG_CLOSURES BUG_ON((atomic_inc_return(&cl->remaining) & CLOSURE_REMAINING_MASK) <= 1); @@ -281,6 +286,7 @@ static inline void closure_init(struct closure *cl, struct closure *parent) closure_get(parent); atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + cl->closure_get_happened = false; closure_debug_create(cl); closure_set_ip(cl); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f0ccca16a0ac..4a658de44ee9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); +bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); @@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev) { return 0; } +static inline bool dma_addressing_limited(struct device *dev) +{ + return false; +} static inline size_t dma_max_mapping_size(struct device *dev) { return 0; @@ -465,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ -static inline bool dma_addressing_limited(struct device *dev) -{ - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); -} - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 226a36ed5aa1..689028257fcc 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1045,10 +1045,10 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, /** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 0388e8c20f52..bb37ad5cc954 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -99,6 +99,17 @@ enum fid_type { FILEID_FAT_WITH_PARENT = 0x72, /* + * 64 bit inode number, 32 bit generation number. + */ + FILEID_INO64_GEN = 0x81, + + /* + * 64 bit inode number, 32 bit generation number, + * 64 bit parent inode number, 32 bit parent generation. + */ + FILEID_INO64_GEN_PARENT = 0x82, + + /* * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) */ @@ -129,7 +140,11 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - struct { + struct { + u64 ino; + u32 gen; + } __packed i64; + struct { u32 block; u16 partref; u16 parent_partref; @@ -253,6 +268,33 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags); +static inline bool exportfs_can_encode_fid(const struct export_operations *nop) +{ + return !nop || nop->encode_fh; +} + +static inline bool exportfs_can_decode_fh(const struct export_operations *nop) +{ + return nop && nop->fh_to_dentry; +} + +static inline bool exportfs_can_encode_fh(const struct export_operations *nop, + int fh_flags) +{ + /* + * If a non-decodeable file handle was requested, we only need to make + * sure that filesystem did not opt-out of encoding fid. + */ + if (fh_flags & EXPORT_FH_FID) + return exportfs_can_encode_fid(nop); + + /* + * If a decodeable file handle was requested, we need to make sure that + * filesystem can also decode file handles. + */ + return exportfs_can_decode_fh(nop); +} + static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, int *max_len) { @@ -272,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Generic helpers for filesystems. */ -extern struct dentry *generic_fh_to_dentry(struct super_block *sb, +int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); +struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); -extern struct dentry *generic_fh_to_parent(struct super_block *sb, +struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1d454dc944b3..0aed62f0c633 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -605,9 +605,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); -struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); - struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); struct gpio_device *gpio_device_find_by_label(const char *label); diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* diff --git a/include/linux/input.h b/include/linux/input.h index 49790c1bd2c4..de6503c0edb8 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -562,7 +562,7 @@ struct ff_device { int max_effects; struct ff_effect *effects; - struct file *effect_owners[]; + struct file *effect_owners[] __counted_by(max_effects); }; int input_ff_create(struct input_dev *dev, unsigned int max_effects); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 3b8580bd33c1..2cf89a538b18 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -47,7 +47,7 @@ struct input_mt { unsigned int flags; unsigned int frame; int *red; - struct input_mt_slot slots[]; + struct input_mt_slot slots[] __counted_by(num_slots); }; static inline void input_mt_set_value(struct input_mt_slot *slot, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8fb1b41b4d15..ec289c1016f5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -66,6 +66,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) #define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested on a stage-2 translation */ @@ -86,6 +87,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -96,6 +99,7 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) #define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED) struct iommu_domain { @@ -340,13 +344,12 @@ static inline int __iommu_copy_struct_from_user( * NULL while the @user_data can be optionally provided, the * new domain must support __IOMMU_DOMAIN_PAGING. * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -365,6 +368,13 @@ static inline int __iommu_copy_struct_from_user( * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. + * @blocked_domain: An always available, always attachable blocking + * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -375,11 +385,11 @@ struct iommu_ops { struct iommu_domain *(*domain_alloc_user)( struct device *dev, u32 flags, struct iommu_domain *parent, const struct iommu_user_data *user_data); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ @@ -402,6 +412,9 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; + struct iommu_domain *blocked_domain; + struct iommu_domain *default_domain; }; /** @@ -420,10 +433,8 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware @@ -442,20 +453,16 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); @@ -476,6 +483,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -483,6 +491,7 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; @@ -526,6 +535,7 @@ struct iommu_fault_param { * @attach_deferred: the dma domain attachment is deferred * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -541,6 +551,7 @@ struct dev_iommu { u32 attach_deferred:1; u32 pci_32bit_workaround:1; u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -768,6 +779,7 @@ extern struct iommu_group *pci_device_group(struct device *dev); extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data @@ -1253,7 +1265,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {} * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 365eb092e9c4..ab1da3142b06 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -445,6 +445,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); + +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + #else /* !CONFIG_KPROBES: */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 99b8176c3738..ff217a5ce552 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) @@ -273,7 +273,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..7b12eebc5586 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -295,7 +295,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..b8da2db4ecd2 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd628c4b011e..cd797e00fe35 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -239,6 +239,7 @@ struct nfs_server { struct list_head delegations; struct list_head ss_copies; + unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 12bbb5c63664..539b57fbf3ce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1772,7 +1772,7 @@ struct nfs_rpc_ops { void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct page *, + int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 4efea9dd967c..e10333d78dbb 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,14 +6,12 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H -#ifdef CONFIG_NVME_KEYRING +#if IS_ENABLED(CONFIG_NVME_KEYRING) key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); -int nvme_keyring_init(void); -void nvme_keyring_exit(void); #else @@ -26,11 +24,5 @@ static inline key_serial_t nvme_keyring_id(void) { return 0; } -static inline int nvme_keyring_init(void) -{ - return 0; -} -static inline void nvme_keyring_exit(void) {} - #endif /* !CONFIG_NVME_KEYRING */ #endif /* _NVME_KEYRING_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a7ba74babad7..44325c068b6a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1732,7 +1732,7 @@ struct nvmf_auth_dhchap_success1_data { __u8 rsvd2; __u8 rvalid; __u8 rsvd3[7]; - /* 'hl' bytes of response value if 'rvalid' is set */ + /* 'hl' bytes of response value */ __u8 rval[]; }; diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 4729d54e8995..73de70362b98 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -17,6 +17,7 @@ #include <linux/pinctrl/pinctrl-state.h> struct device; +struct gpio_chip; /* This struct is private to the core and should be regarded as a cookie */ struct pinctrl; @@ -25,27 +26,30 @@ struct pinctrl_state; #ifdef CONFIG_PINCTRL /* External interface to pin control */ -extern bool pinctrl_gpio_can_use_line(unsigned gpio); -extern int pinctrl_gpio_request(unsigned gpio); -extern void pinctrl_gpio_free(unsigned gpio); -extern int pinctrl_gpio_direction_input(unsigned gpio); -extern int pinctrl_gpio_direction_output(unsigned gpio); -extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); - -extern struct pinctrl * __must_check pinctrl_get(struct device *dev); -extern void pinctrl_put(struct pinctrl *p); -extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, - const char *name); -extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); - -extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); -extern void devm_pinctrl_put(struct pinctrl *p); -extern int pinctrl_select_default_state(struct device *dev); +bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset); +void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_direction_input(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_direction_output(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config); + +struct pinctrl * __must_check pinctrl_get(struct device *dev); +void pinctrl_put(struct pinctrl *p); +struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name); +int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); + +struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); +void devm_pinctrl_put(struct pinctrl *p); +int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM -extern int pinctrl_pm_select_default_state(struct device *dev); -extern int pinctrl_pm_select_sleep_state(struct device *dev); -extern int pinctrl_pm_select_idle_state(struct device *dev); +int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_sleep_state(struct device *dev); +int pinctrl_pm_select_idle_state(struct device *dev); #else static inline int pinctrl_pm_select_default_state(struct device *dev) { @@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #else /* !CONFIG_PINCTRL */ -static inline bool pinctrl_gpio_can_use_line(unsigned gpio) +static inline bool +pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset) { return true; } -static inline int pinctrl_gpio_request(unsigned gpio) +static inline int +pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline void pinctrl_gpio_free(unsigned gpio) +static inline void +pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset) { } -static inline int pinctrl_gpio_direction_input(unsigned gpio) +static inline int +pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_direction_output(unsigned gpio) +static inline int +pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +static inline int +pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { return 0; } diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..6c19d4fbbe39 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,7 +10,7 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include <drm/drm_mode.h> +#include <video/videomode.h> enum shmob_drm_clk_source { SHMOB_DRM_CLK_BUS, @@ -18,72 +18,21 @@ enum shmob_drm_clk_source { SHMOB_DRM_CLK_EXTERNAL, }; -enum shmob_drm_interface { - SHMOB_DRM_IFACE_RGB8, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_RGB9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_RGB12A, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_RGB12B, /* 12bpp */ - SHMOB_DRM_IFACE_RGB16, /* 16bpp */ - SHMOB_DRM_IFACE_RGB18, /* 18bpp */ - SHMOB_DRM_IFACE_RGB24, /* 24bpp */ - SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ -}; - -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ - struct drm_mode_modeinfo mode; + struct videomode mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - -#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ -#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ -#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ -#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */ -#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */ - struct shmob_drm_interface_data { - enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; + unsigned int bus_fmt; /* MEDIA_BUS_FMT_* */ unsigned int clk_div; - unsigned int flags; }; struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2f9f690a9c1..cda3597b84f2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* @@ -71,7 +71,6 @@ struct pwm_state { * @hwpwm: per-chip relative index of the PWM device * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -82,7 +81,6 @@ struct pwm_device { unsigned int hwpwm; unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -267,7 +265,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,13 +275,13 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller + * @owner: module providing this chip * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier @@ -295,6 +292,7 @@ struct pwm_ops { struct pwm_chip { struct device *dev; const struct pwm_ops *ops; + struct module *owner; int base; unsigned int npwm; @@ -383,13 +381,13 @@ static inline void pwm_disable(struct pwm_device *pwm) /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, @@ -445,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - return -EINVAL; -} - -static inline void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return NULL; -} - static inline int pwmchip_add(struct pwm_chip *chip) { return -EINVAL; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 86825c88b576..255a0562aea5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -566,6 +566,7 @@ struct spi_controller { #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index af7358277f1c..e9d4377d03c6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -92,6 +92,7 @@ struct rpc_clnt { }; const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; }; /* diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec4e9367f5b0..68f3d315d2e1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,7 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; - s8 req_usec_ts; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h deleted file mode 100644 index e9c0cd36c48a..000000000000 --- a/include/linux/vlynq.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __VLYNQ_H__ -#define __VLYNQ_H__ - -#include <linux/device.h> -#include <linux/types.h> - -struct module; - -#define VLYNQ_NUM_IRQS 32 - -struct vlynq_mapping { - u32 size; - u32 offset; -}; - -enum vlynq_divisor { - vlynq_div_auto = 0, - vlynq_ldiv1, - vlynq_ldiv2, - vlynq_ldiv3, - vlynq_ldiv4, - vlynq_ldiv5, - vlynq_ldiv6, - vlynq_ldiv7, - vlynq_ldiv8, - vlynq_rdiv1, - vlynq_rdiv2, - vlynq_rdiv3, - vlynq_rdiv4, - vlynq_rdiv5, - vlynq_rdiv6, - vlynq_rdiv7, - vlynq_rdiv8, - vlynq_div_external -}; - -struct vlynq_device_id { - u32 id; - enum vlynq_divisor divisor; - unsigned long driver_data; -}; - -struct vlynq_regs; -struct vlynq_device { - u32 id, dev_id; - int local_irq; - int remote_irq; - enum vlynq_divisor divisor; - u32 regs_start, regs_end; - u32 mem_start, mem_end; - u32 irq_start, irq_end; - int irq; - int enabled; - struct vlynq_regs *local; - struct vlynq_regs *remote; - struct device dev; -}; - -struct vlynq_driver { - char *name; - struct vlynq_device_id *id_table; - int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id); - void (*remove)(struct vlynq_device *dev); - struct device_driver driver; -}; - -struct plat_vlynq_ops { - int (*on)(struct vlynq_device *dev); - void (*off)(struct vlynq_device *dev); -}; - -static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv) -{ - return container_of(drv, struct vlynq_driver, driver); -} - -static inline struct vlynq_device *to_vlynq_device(struct device *device) -{ - return container_of(device, struct vlynq_device, dev); -} - -extern struct bus_type vlynq_bus_type; - -extern int __vlynq_register_driver(struct vlynq_driver *driver, - struct module *owner); - -static inline int vlynq_register_driver(struct vlynq_driver *driver) -{ - return __vlynq_register_driver(driver, THIS_MODULE); -} - -static inline void *vlynq_get_drvdata(struct vlynq_device *dev) -{ - return dev_get_drvdata(&dev->dev); -} - -static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data) -{ - dev_set_drvdata(&dev->dev, data); -} - -static inline u32 vlynq_mem_start(struct vlynq_device *dev) -{ - return dev->mem_start; -} - -static inline u32 vlynq_mem_end(struct vlynq_device *dev) -{ - return dev->mem_end; -} - -static inline u32 vlynq_mem_len(struct vlynq_device *dev) -{ - return dev->mem_end - dev->mem_start + 1; -} - -static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq; -} - -static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq) -{ - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq - dev->irq_start; -} - -extern void vlynq_unregister_driver(struct vlynq_driver *driver); -extern int vlynq_enable_device(struct vlynq_device *dev); -extern void vlynq_disable_device(struct vlynq_device *dev); -extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq); -extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq); - -#endif /* __VLYNQ_H__ */ diff --git a/include/net/flow.h b/include/net/flow.h index 7f0adda3bf2f..335bbc52171c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -40,8 +40,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; kuid_t flowic_uid; - struct flowi_tunnel flowic_tun_key; __u32 flowic_multipath_hash; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a375a171ef3c..b56be10838f0 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -124,7 +124,7 @@ struct tcp_ao_info { #define tcp_hash_fail(msg, family, skb, fmt, ...) \ do { \ const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[5] = {}; \ + char hdr_flags[6]; \ char *f = hdr_flags; \ \ if (th->fin) \ @@ -133,17 +133,18 @@ do { \ *f++ = 'S'; \ if (th->rst) \ *f++ = 'R'; \ + if (th->psh) \ + *f++ = 'P'; \ if (th->ack) \ - *f++ = 'A'; \ - if (f != hdr_flags) \ - *f = ' '; \ + *f++ = '.'; \ + *f = 0; \ if ((family) == AF_INET) { \ - net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \ + net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ &ip_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ } else { \ - net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \ + net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ hdr_flags, ##__VA_ARGS__); \ diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index a5ef84944a06..71ae37d3bedd 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -96,7 +96,6 @@ struct tegra_smmu_soc { struct tegra_mc; struct tegra_smmu; -struct gart_device; #ifdef CONFIG_TEGRA_IOMMU_SMMU struct tegra_smmu *tegra_smmu_probe(struct device *dev, @@ -116,28 +115,6 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu) } #endif -#ifdef CONFIG_TEGRA_IOMMU_GART -struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc); -int tegra_gart_suspend(struct gart_device *gart); -int tegra_gart_resume(struct gart_device *gart); -#else -static inline struct gart_device * -tegra_gart_probe(struct device *dev, struct tegra_mc *mc) -{ - return ERR_PTR(-ENODEV); -} - -static inline int tegra_gart_suspend(struct gart_device *gart) -{ - return -ENODEV; -} - -static inline int tegra_gart_resume(struct gart_device *gart) -{ - return -ENODEV; -} -#endif - struct tegra_mc_reset { const char *name; unsigned long id; @@ -185,8 +162,6 @@ struct tegra_mc_ops { */ int (*probe)(struct tegra_mc *mc); void (*remove)(struct tegra_mc *mc); - int (*suspend)(struct tegra_mc *mc); - int (*resume)(struct tegra_mc *mc); irqreturn_t (*handle_irq)(int irq, void *data); int (*probe_device)(struct tegra_mc *mc, struct device *dev); }; @@ -225,7 +200,6 @@ struct tegra_mc { struct tegra_bpmp *bpmp; struct device *dev; struct tegra_smmu *smmu; - struct gart_device *gart; void __iomem *regs; void __iomem *bcast_ch_regs; void __iomem **ch_regs; diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index a03c543cb072..f05f747e444d 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -34,7 +34,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1025 */ +/* RGB - next is 0x1026 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -46,6 +46,7 @@ #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 #define MEDIA_BUS_FMT_RGB565_2X8_LE 0x1008 #define MEDIA_BUS_FMT_RGB666_1X18 0x1009 +#define MEDIA_BUS_FMT_RGB666_2X9_BE 0x1025 #define MEDIA_BUS_FMT_BGR666_1X18 0x1023 #define MEDIA_BUS_FMT_RBG888_1X24 0x100e #define MEDIA_BUS_FMT_RGB666_1X24_CPADHI 0x1015 diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index c8ae72466ee6..3cd044edee5d 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -3,8 +3,8 @@ /* Documentation/netlink/specs/nfsd.yaml */ /* YNL-GEN uapi header */ -#ifndef _UAPI_LINUX_NFSD_H -#define _UAPI_LINUX_NFSD_H +#ifndef _UAPI_LINUX_NFSD_NETLINK_H +#define _UAPI_LINUX_NFSD_NETLINK_H #define NFSD_FAMILY_NAME "nfsd" #define NFSD_FAMILY_VERSION 1 @@ -36,4 +36,4 @@ enum { NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) }; -#endif /* _UAPI_LINUX_NFSD_H */ +#endif /* _UAPI_LINUX_NFSD_NETLINK_H */ diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index fea06810b43d..a1e4239c7d75 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -52,7 +52,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } -void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) +bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; @@ -65,7 +65,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) * multiple use. */ if (req->flags & REQ_F_PARTIAL_IO) - return; + return false; io_ring_submit_lock(ctx, issue_flags); @@ -76,7 +76,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) req->buf_index = buf->bgid; io_ring_submit_unlock(ctx, issue_flags); - return; + return true; } unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags) diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index d14345ef61fc..f2d615236b2c 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -53,11 +53,11 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); -void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); +bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); -static inline void io_kbuf_recycle_ring(struct io_kiocb *req) +static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear @@ -80,8 +80,10 @@ static inline void io_kbuf_recycle_ring(struct io_kiocb *req) } else { req->buf_index = req->buf_list->bgid; req->flags &= ~REQ_F_BUFFER_RING; + return true; } } + return false; } static inline bool io_do_buffer_select(struct io_kiocb *req) @@ -91,12 +93,13 @@ static inline bool io_do_buffer_select(struct io_kiocb *req) return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); } -static inline void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) +static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) { if (req->flags & REQ_F_BUFFER_SELECTED) - io_kbuf_recycle_legacy(req, issue_flags); + return io_kbuf_recycle_legacy(req, issue_flags); if (req->flags & REQ_F_BUFFER_RING) - io_kbuf_recycle_ring(req); + return io_kbuf_recycle_ring(req); + return false; } static inline unsigned int __io_put_kbuf_list(struct io_kiocb *req, diff --git a/io_uring/net.c b/io_uring/net.c index 7a8e298af81b..75d494dad7e2 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1461,16 +1461,6 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - if (connect->in_progress) { - struct socket *socket; - - ret = -ENOTSOCK; - socket = sock_from_file(req->file); - if (socket) - ret = sock_error(socket->sk); - goto out; - } - if (req_has_async_data(req)) { io = req->async_data; } else { @@ -1490,9 +1480,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) && force_nonblock) { if (ret == -EINPROGRESS) { connect->in_progress = true; - return -EAGAIN; - } - if (ret == -ECONNABORTED) { + } else if (ret == -ECONNABORTED) { if (connect->seen_econnaborted) goto out; connect->seen_econnaborted = true; @@ -1506,6 +1494,16 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } + if (connect->in_progress) { + /* + * At least bluetooth will return -EBADFD on a re-connect + * attempt, and it's (supposedly) also valid to get -EISCONN + * which means the previous result is good. For both of these, + * grab the sock_error() and use that for the completion. + */ + if (ret == -EBADFD || ret == -EISCONN) + ret = sock_error(sock_from_file(req->file)->sk); + } if (ret == -ERESTARTSYS) ret = -EINTR; out: diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 25a3515a177c..799db44283c7 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -66,7 +66,7 @@ const struct io_issue_def io_issue_defs[] = { .iopoll = 1, .iopoll_queue = 1, .vectored = 1, - .prep = io_prep_rw, + .prep = io_prep_rwv, .issue = io_read, }, [IORING_OP_WRITEV] = { @@ -80,7 +80,7 @@ const struct io_issue_def io_issue_defs[] = { .iopoll = 1, .iopoll_queue = 1, .vectored = 1, - .prep = io_prep_rw, + .prep = io_prep_rwv, .issue = io_write, }, [IORING_OP_FSYNC] = { @@ -98,7 +98,7 @@ const struct io_issue_def io_issue_defs[] = { .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, - .prep = io_prep_rw, + .prep = io_prep_rw_fixed, .issue = io_read, }, [IORING_OP_WRITE_FIXED] = { @@ -111,7 +111,7 @@ const struct io_issue_def io_issue_defs[] = { .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, - .prep = io_prep_rw, + .prep = io_prep_rw_fixed, .issue = io_write, }, [IORING_OP_POLL_ADD] = { diff --git a/io_uring/rw.c b/io_uring/rw.c index 3398e1d944c2..64390d4e20c1 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -83,18 +83,6 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) /* used for fixed read/write too - just read unconditionally */ req->buf_index = READ_ONCE(sqe->buf_index); - if (req->opcode == IORING_OP_READ_FIXED || - req->opcode == IORING_OP_WRITE_FIXED) { - struct io_ring_ctx *ctx = req->ctx; - u16 index; - - if (unlikely(req->buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); - req->imu = ctx->user_bufs[index]; - io_req_set_rsrc_node(req, ctx, 0); - } - ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { ret = ioprio_check_cap(ioprio); @@ -110,16 +98,42 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); + return 0; +} + +int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + int ret; + + ret = io_prep_rw(req, sqe); + if (unlikely(ret)) + return ret; - /* Have to do this validation here, as this is in io_read() rw->len might - * have chanaged due to buffer selection + /* + * Have to do this validation here, as this is in io_read() rw->len + * might have chanaged due to buffer selection */ - if (req->opcode == IORING_OP_READV && req->flags & REQ_F_BUFFER_SELECT) { - ret = io_iov_buffer_select_prep(req); - if (ret) - return ret; - } + if (req->flags & REQ_F_BUFFER_SELECT) + return io_iov_buffer_select_prep(req); + + return 0; +} +int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_ring_ctx *ctx = req->ctx; + u16 index; + int ret; + + ret = io_prep_rw(req, sqe); + if (unlikely(ret)) + return ret; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req, ctx, 0); return 0; } @@ -129,12 +143,20 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) */ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); int ret; + /* must be used with provided buffers */ + if (!(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; + ret = io_prep_rw(req, sqe); if (unlikely(ret)) return ret; + if (rw->addr || rw->len) + return -EINVAL; + req->flags |= REQ_F_APOLL_MULTISHOT; return 0; } @@ -542,6 +564,9 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, { if (!force && !io_cold_defs[req->opcode].prep_async) return 0; + /* opcode type doesn't need async data */ + if (!io_cold_defs[req->opcode].async_size) + return 0; if (!req_has_async_data(req)) { struct io_async_rw *iorw; @@ -887,6 +912,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags) int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned int cflags = 0; int ret; @@ -903,7 +929,12 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * handling arm it. */ if (ret == -EAGAIN) { - io_kbuf_recycle(req, issue_flags); + /* + * Reset rw->len to 0 again to avoid clamping future mshot + * reads, in case the buffer size varies. + */ + if (io_kbuf_recycle(req, issue_flags)) + rw->len = 0; return -EAGAIN; } @@ -916,6 +947,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * jump to the termination path. This request is then done. */ cflags = io_put_kbuf(req, issue_flags); + rw->len = 0; /* similarly to above, reset len to 0 */ if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, diff --git a/io_uring/rw.h b/io_uring/rw.h index c5aed03d42a4..f9e89b4fe4da 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -16,6 +16,8 @@ struct io_async_rw { }; int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_read(struct io_kiocb *req, unsigned int issue_flags); int io_readv_prep_async(struct io_kiocb *req); int io_write(struct io_kiocb *req, unsigned int issue_flags); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 833faa04461b..0fae79164187 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -782,9 +782,7 @@ struct bpf_iter_num_kern { int end; /* final value, exclusive */ } __aligned(8); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) { @@ -843,4 +841,4 @@ __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) s->cur = s->end = 0; } -__diag_pop(); +__bpf_kfunc_end_defs(); diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index 209e5135f9fb..f04a468cf6a7 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -282,7 +282,7 @@ static struct bpf_iter_reg bpf_cgroup_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__cgroup, cgroup), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &cgroup_iter_seq_info, }; @@ -305,9 +305,7 @@ struct bpf_iter_css_kern { unsigned int flags; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it, struct cgroup_subsys_state *start, unsigned int flags) @@ -358,4 +356,4 @@ __bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it) { } -__diag_pop();
\ No newline at end of file +__bpf_kfunc_end_defs(); diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 6983af8e093c..e01c741e54e7 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -34,9 +34,7 @@ static bool cpu_valid(u32 cpu) return cpu < nr_cpu_ids; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global kfuncs as their definitions will be in BTF"); +__bpf_kfunc_start_defs(); /** * bpf_cpumask_create() - Create a mutable BPF cpumask. @@ -407,7 +405,7 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, return cpumask_any_and_distribute(src1, src2); } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e46ac288a108..56b0c1f678ee 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1177,13 +1177,6 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map ret = -EBUSY; goto out; } - if (!atomic64_read(&map->usercnt)) { - /* maps with timers must be either held by user space - * or pinned in bpffs. - */ - ret = -EPERM; - goto out; - } /* allocate hrtimer via map_kmalloc to use memcg accounting */ t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); if (!t) { @@ -1196,7 +1189,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map rcu_assign_pointer(t->callback_fn, NULL); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; - timer->timer = t; + WRITE_ONCE(timer->timer, t); + /* Guarantee the order between timer->timer and map->usercnt. So + * when there are concurrent uref release and bpf timer init, either + * bpf_timer_cancel_and_free() called by uref release reads a no-NULL + * timer or atomic64_read() below returns a zero usercnt. + */ + smp_mb(); + if (!atomic64_read(&map->usercnt)) { + /* maps with timers must be either held by user space + * or pinned in bpffs. + */ + WRITE_ONCE(timer->timer, NULL); + kfree(t); + ret = -EPERM; + } out: __bpf_spin_unlock_irqrestore(&timer->lock); return ret; @@ -1374,7 +1381,7 @@ void bpf_timer_cancel_and_free(void *val) /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized. */ - timer->timer = NULL; + WRITE_ONCE(timer->timer, NULL); out: __bpf_spin_unlock_irqrestore(&timer->lock); if (!t) @@ -1886,9 +1893,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, } } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) { @@ -2505,7 +2510,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) WARN(1, "A call to BPF exception callback should never return\n"); } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE @@ -2564,15 +2569,17 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) +#ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) -BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) -BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) +#endif +BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED) +BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6fc9dae9edc8..6abd7c5df4b3 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -193,9 +193,7 @@ static int __init bpf_map_iter_init(void) late_initcall(bpf_map_iter_init); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) { @@ -213,7 +211,7 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) return ret; } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 654601dd6b49..26082b97894d 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -704,7 +704,7 @@ static struct bpf_iter_reg task_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__task, task), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &task_seq_info, .fill_link_info = bpf_iter_fill_link_info, @@ -822,9 +822,7 @@ struct bpf_iter_task_vma_kern { struct bpf_iter_task_vma_kern_data *data; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, struct task_struct *task, u64 addr) @@ -890,7 +888,9 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) } } -__diag_pop(); +__bpf_kfunc_end_defs(); + +#ifdef CONFIG_CGROUPS struct bpf_iter_css_task { __u64 __opaque[1]; @@ -900,9 +900,7 @@ struct bpf_iter_css_task_kern { struct css_task_iter *css_it; } __attribute__((aligned(8))); -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, struct cgroup_subsys_state *css, unsigned int flags) @@ -948,7 +946,9 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) bpf_mem_free(&bpf_global_ma, kit->css_it); } -__diag_pop(); +__bpf_kfunc_end_defs(); + +#endif /* CONFIG_CGROUPS */ struct bpf_iter_task { __u64 __opaque[3]; @@ -969,9 +969,7 @@ enum { BPF_TASK_ITER_PROC_THREADS }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task__nullable, unsigned int flags) @@ -1041,7 +1039,7 @@ __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it) { } -__diag_pop(); +__bpf_kfunc_end_defs(); DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 857d76694517..bd1c42eb540f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3742,7 +3742,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, if (class == BPF_ALU || class == BPF_ALU64) { if (!bt_is_reg_set(bt, dreg)) return 0; - if (opcode == BPF_MOV) { + if (opcode == BPF_END || opcode == BPF_NEG) { + /* sreg is reserved and unused + * dreg still need precision before this insn + */ + return 0; + } else if (opcode == BPF_MOV) { if (BPF_SRC(insn->code) == BPF_X) { /* dreg = sreg or dreg = (s8, s16, s32)sreg * dreg needs precision after this insn @@ -4674,7 +4679,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, insn->imm != 0 && env->bpf_capable) { struct bpf_reg_state fake_reg = {}; - __mark_reg_known(&fake_reg, (u32)insn->imm); + __mark_reg_known(&fake_reg, insn->imm); fake_reg.type = SCALAR_VALUE; save_register_state(state, spi, &fake_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { @@ -5388,7 +5393,9 @@ static bool in_rcu_cs(struct bpf_verifier_env *env) /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) +#ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) +#endif BTF_ID(struct, bpf_cpumask) BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) @@ -10835,7 +10842,9 @@ BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) BTF_ID(func, bpf_throw) +#ifdef CONFIG_CGROUPS BTF_ID(func, bpf_iter_css_task_new) +#endif BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -10861,7 +10870,11 @@ BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) BTF_ID(func, bpf_throw) +#ifdef CONFIG_CGROUPS BTF_ID(func, bpf_iter_css_task_new) +#else +BTF_ID_UNUSED +#endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -11394,6 +11407,12 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, &meta->arg_rbtree_root.field); } +/* + * css_task iter allowlist is needed to avoid dead locking on css_set_lock. + * LSM hooks and iters (both sleepable and non-sleepable) are safe. + * Any sleepable progs are also safe since bpf_check_attach_target() enforce + * them can only be attached to some specific hook points. + */ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); @@ -11401,10 +11420,12 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) switch (prog_type) { case BPF_PROG_TYPE_LSM: return true; - case BPF_TRACE_ITER: - return env->prog->aux->sleepable; + case BPF_PROG_TYPE_TRACING: + if (env->prog->expected_attach_type == BPF_TRACE_ITER) + return true; + fallthrough; default: - return false; + return env->prog->aux->sleepable; } } @@ -11663,7 +11684,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_ITER: if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { if (!check_css_task_iter_allowlist(env)) { - verbose(env, "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n"); + verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n"); return -EINVAL; } } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index d80d7a608141..c0adb7254b45 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -156,19 +156,16 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * optimize away the callsite. Therefore, __weak is needed to ensure that the * call is still emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "kfuncs which will be used in BPF programs"); + +__bpf_hook_start(); __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu) { } -__diag_pop(); +__bpf_hook_end(); /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 621037a0aa87..ce1bb2301c06 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -1006,6 +1006,9 @@ void kgdb_panic(const char *msg) if (panic_timeout) return; + debug_locks_off(); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + if (dbg_kdb_mode) kdb_printf("PANIC: %s\n", msg); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 438b868cbfa9..6b213c8252d6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -272,11 +272,10 @@ char *kdbgetenv(const char *match) * kdballocenv - This function is used to allocate bytes for * environment entries. * Parameters: - * match A character string representing a numeric value - * Outputs: - * *value the unsigned long representation of the env variable 'match' + * bytes The number of bytes to allocate in the static buffer. * Returns: - * Zero on success, a kdb diagnostic on failure. + * A pointer to the allocated space in the buffer on success. + * NULL if bytes > size available in the envbuffer. * Remarks: * We use a static environment buffer (envbuffer) to hold the values * of dynamically generated environment variables (see kdb_set). Buffer diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index ed3056eb20b8..73c95815789a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -587,6 +587,46 @@ int dma_direct_supported(struct device *dev, u64 mask) return mask >= phys_to_dma_unencrypted(dev, min_mask); } +/* + * To check whether all ram resource ranges are covered by dma range map + * Returns 0 when further check is needed + * Returns 1 if there is some RAM range can't be covered by dma_range_map + */ +static int check_ram_in_range_map(unsigned long start_pfn, + unsigned long nr_pages, void *data) +{ + unsigned long end_pfn = start_pfn + nr_pages; + const struct bus_dma_region *bdr = NULL; + const struct bus_dma_region *m; + struct device *dev = data; + + while (start_pfn < end_pfn) { + for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { + unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); + + if (start_pfn >= cpu_start_pfn && + start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) { + bdr = m; + break; + } + } + if (!bdr) + return 1; + + start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); + } + + return 0; +} + +bool dma_direct_all_ram_mapped(struct device *dev) +{ + if (!dev->dma_range_map) + return true; + return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, + check_ram_in_range_map); +} + size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 97ec892ea0b5..18d346118fe8 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -20,6 +20,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +bool dma_direct_all_ram_mapped(struct device *dev); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index e323ca48f7f2..58db8fd70471 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -793,6 +793,28 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +bool dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops)) + return false; + return !dma_direct_all_ram_mapped(dev); +} +EXPORT_SYMBOL_GPL(dma_addressing_limited); + size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 26202274784f..33d942615be5 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -283,7 +283,8 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, } for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } @@ -558,29 +559,40 @@ void __init swiotlb_exit(void) * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * - * Return: Decrypted pages, or %NULL on failure. + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. */ -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; + phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; - vaddr = page_address(page); + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } + + vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: - __free_pages(page, order); + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); return NULL; } @@ -618,11 +630,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; - while ((page = alloc_dma_pages(gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 0d866eaa4cc8..b531c33e9545 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -500,6 +500,7 @@ static inline void rcu_expedite_gp(void) { } static inline void rcu_unexpedite_gp(void) { } static inline void rcu_async_hurry(void) { } static inline void rcu_async_relax(void) { } +static inline bool rcu_cpu_online(int cpu) { return true; } #else /* #ifdef CONFIG_TINY_RCU */ bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ @@ -509,6 +510,7 @@ void rcu_unexpedite_gp(void); void rcu_async_hurry(void); void rcu_async_relax(void); void rcupdate_announce_bootup_oddness(void); +bool rcu_cpu_online(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void show_rcu_tasks_gp_kthreads(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 1fa631168594..f54d5782eca0 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -895,10 +895,36 @@ static void rcu_tasks_pregp_step(struct list_head *hop) synchronize_rcu(); } +/* Check for quiescent states since the pregp's synchronize_rcu() */ +static bool rcu_tasks_is_holdout(struct task_struct *t) +{ + int cpu; + + /* Has the task been seen voluntarily sleeping? */ + if (!READ_ONCE(t->on_rq)) + return false; + + /* + * Idle tasks (or idle injection) within the idle loop are RCU-tasks + * quiescent states. But CPU boot code performed by the idle task + * isn't a quiescent state. + */ + if (is_idle_task(t)) + return false; + + cpu = task_cpu(t); + + /* Idle tasks on offline CPUs are RCU-tasks quiescent states. */ + if (t == idle_task(cpu) && !rcu_cpu_online(cpu)) + return false; + + return true; +} + /* Per-task initial processing. */ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop) { - if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) { + if (t != current && rcu_tasks_is_holdout(t)) { get_task_struct(t); t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); WRITE_ONCE(t->rcu_tasks_holdout, true); @@ -947,7 +973,7 @@ static void check_holdout_task(struct task_struct *t, if (!READ_ONCE(t->rcu_tasks_holdout) || t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || - !READ_ONCE(t->on_rq) || + !rcu_tasks_is_holdout(t) || (IS_ENABLED(CONFIG_NO_HZ_FULL) && !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { WRITE_ONCE(t->rcu_tasks_holdout, false); @@ -1525,7 +1551,7 @@ static int trc_inspect_reader(struct task_struct *t, void *bhp_in) } else { // The task is not running, so C-language access is safe. nesting = t->trc_reader_nesting; - WARN_ON_ONCE(ofl && task_curr(t) && !is_idle_task(t)); + WARN_ON_ONCE(ofl && task_curr(t) && (t != idle_task(task_cpu(t)))); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && ofl) n_heavy_reader_ofl_updates++; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d3a97e129020..3ac3c846105f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -755,14 +755,19 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) } /* - * Return true if the specified CPU has passed through a quiescent - * state by virtue of being in or having passed through an dynticks - * idle state since the last call to dyntick_save_progress_counter() - * for this same CPU, or by virtue of having been offline. + * Returns positive if the specified CPU has passed through a quiescent state + * by virtue of being in or having passed through an dynticks idle state since + * the last call to dyntick_save_progress_counter() for this same CPU, or by + * virtue of having been offline. + * + * Returns negative if the specified CPU needs a force resched. + * + * Returns zero otherwise. */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; + int ret = 0; struct rcu_node *rnp = rdp->mynode; /* @@ -848,8 +853,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || rcu_state.cbovld)) { WRITE_ONCE(rdp->rcu_urgent_qs, true); - resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); + ret = -1; } /* @@ -862,8 +867,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (time_after(jiffies, rcu_state.jiffies_resched)) { if (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq)) { - resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); + ret = -1; } if (IS_ENABLED(CONFIG_IRQ_WORK) && !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && @@ -892,7 +897,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) } } - return 0; + return ret; } /* Trace-event wrapper function for trace_rcu_future_grace_period. */ @@ -2271,15 +2276,15 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) { int cpu; unsigned long flags; - unsigned long mask; - struct rcu_data *rdp; struct rcu_node *rnp; rcu_state.cbovld = rcu_state.cbovldnext; rcu_state.cbovldnext = false; rcu_for_each_leaf_node(rnp) { + unsigned long mask = 0; + unsigned long rsmask = 0; + cond_resched_tasks_rcu_qs(); - mask = 0; raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.cbovldnext |= !!rnp->cbovldmask; if (rnp->qsmask == 0) { @@ -2297,11 +2302,17 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) continue; } for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) { + struct rcu_data *rdp; + int ret; + rdp = per_cpu_ptr(&rcu_data, cpu); - if (f(rdp)) { + ret = f(rdp); + if (ret > 0) { mask |= rdp->grpmask; rcu_disable_urgency_upon_qs(rdp); } + if (ret < 0) + rsmask |= rdp->grpmask; } if (mask != 0) { /* Idle/offline CPUs, report (releases rnp->lock). */ @@ -2310,6 +2321,9 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) /* Nothing to do here, so just drop the lock. */ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } + + for_each_leaf_node_cpu_mask(rnp, cpu, rsmask) + resched_cpu(cpu); } } @@ -4195,6 +4209,13 @@ static bool rcu_rdp_cpu_online(struct rcu_data *rdp) return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode)); } +bool rcu_cpu_online(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + return rcu_rdp_cpu_online(rdp); +} + #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index df697c74d519..84e8a0f6e4e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1252,9 +1252,7 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { }; #ifdef CONFIG_KEYS -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "kfuncs which will be used in BPF programs"); +__bpf_kfunc_start_defs(); /** * bpf_lookup_user_key - lookup a key by its serial @@ -1404,7 +1402,7 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, } #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 8bfe23af9c73..7d2ddbcfa377 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const char *argv[], for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') { + if (is_tracepoint) { + trace_probe_log_set_index(i); + trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + return -EINVAL; + } *is_return = true; - /* - * NOTE: Don't check is_tracepoint here, because it will - * be checked when the argument is parsed. - */ break; } } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a3442db35670..52f8b537dd0a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1020,9 +1020,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); /** * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @kretprobe: Is this a return probe? * @name: The name of the kprobe event * @loc: The location of the kprobe event - * @kretprobe: Is this a return probe? * @...: Variable number of arg (pairs), one pair for each field * * NOTE: Users normally won't want to call this function directly, but diff --git a/lib/closure.c b/lib/closure.c index 0855e698ced1..f86c9eeafb35 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -21,6 +21,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); if (!r) { + smp_acquire__after_ctrl_dep(); + + cl->closure_get_happened = false; + if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); @@ -43,7 +47,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) /* For clearing flags with the same atomic op as a put */ void closure_sub(struct closure *cl, int v) { - closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); + closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining)); } EXPORT_SYMBOL(closure_sub); @@ -52,7 +56,7 @@ EXPORT_SYMBOL(closure_sub); */ void closure_put(struct closure *cl) { - closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); + closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining)); } EXPORT_SYMBOL(closure_put); @@ -90,6 +94,7 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) if (atomic_read(&cl->remaining) & CLOSURE_WAITING) return false; + cl->closure_get_happened = true; closure_set_waiting(cl, _RET_IP_); atomic_add(CLOSURE_WAITING + 1, &cl->remaining); llist_add(&cl->list, &waitlist->list); diff --git a/lib/test_objpool.c b/lib/test_objpool.c index a94078402138..bfdb81599832 100644 --- a/lib/test_objpool.c +++ b/lib/test_objpool.c @@ -311,7 +311,7 @@ static void ot_fini_sync(struct ot_context *sop) ot_kfree(sop->test, sop, sizeof(*sop)); } -struct { +static struct { struct ot_context * (*init)(struct ot_test *oc); void (*fini)(struct ot_context *sop); } g_ot_sync_ops[] = { @@ -475,7 +475,7 @@ static struct ot_context *ot_init_async_m0(struct ot_test *test) return sop; } -struct { +static struct { struct ot_context * (*init)(struct ot_test *oc); void (*fini)(struct ot_context *sop); } g_ot_async_ops[] = { @@ -632,7 +632,7 @@ static int ot_start_async(struct ot_test *test) #define NODE_COMPACT sizeof(struct ot_node) #define NODE_VMALLOC (512) -struct ot_test g_testcases[] = { +static struct ot_test g_testcases[] = { /* sync & normal */ {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, diff --git a/mm/memblock.c b/mm/memblock.c index fd492e5bbdbc..5a88d6d24d79 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -424,7 +424,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, * of memory that aren't suitable for allocation */ if (!memblock_can_resize) - return -1; + panic("memblock: cannot resize %s array\n", type->name); /* Calculate new doubled size */ old_size = type->max * sizeof(struct memblock_region); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0841f8d82419..c9fdcc5cdce1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -503,9 +503,8 @@ out: * architecture dependent calling conventions. 7+ can be supported in the * future. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); + __bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; @@ -605,7 +604,7 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8f19253024b0..741360219552 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void) module_init(ebtable_broute_init); module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Force packets to be routed instead of bridged"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 278f324e6752..dacd81b12e62 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void) module_init(ebtable_filter_init); module_exit(ebtable_filter_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy filter table"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9066f7f376d5..0f2a8c6118d4 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void) module_init(ebtable_nat_init); module_exit(ebtable_nat_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy stateless nat table"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aa23479b20b2..99d82676f780 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy core"); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 71056ee84773..b5c406a6e765 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini); MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking"); diff --git a/net/core/filter.c b/net/core/filter.c index 21d75108c2e9..383f96b0a1c7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11767,9 +11767,7 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) return func; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) { @@ -11816,7 +11814,7 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__diag_pop(); +__bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) @@ -11879,10 +11877,7 @@ static int __init bpf_kfunc_init(void) } late_initcall(bpf_kfunc_init); -/* Disables missing prototype warnings */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. * @@ -11916,7 +11911,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) return sk->sk_prot->diag_destroy(sk, ECONNABORTED); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e409b98aba0..dec544337236 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -217,8 +217,12 @@ static int page_pool_init(struct page_pool *pool, return -ENOMEM; #endif - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->recycle_stats); +#endif return -ENOMEM; + } atomic_set(&pool->pages_state_release_cnt, 0); diff --git a/net/core/xdp.c b/net/core/xdp.c index df4789ab512d..b6f1d6dab3f2 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -696,9 +696,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) return nxdpf; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /** * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp. @@ -738,7 +736,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 1b8cbfda6e5d..44b033fe1ef6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -629,9 +629,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); @@ -639,6 +636,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ireq_family = AF_INET; ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + /* * Step 3: Process LISTEN state * diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8d344b219f84..4550b680665a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -360,15 +360,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 9cbae0169249..788dfdc498a9 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -15,7 +15,7 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, }, [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1), [DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1), - [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(3), + [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15), }; const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index b71dab630a87..80cdc6f6b34c 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -342,9 +342,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, skb = skb_copy_expand(frame->skb_std, 0, skb_tailroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); - prp_fill_rct(skb, frame, port); - - return skb; + return prp_fill_rct(skb, frame, port); } static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 3760a14b6b57..4da03bf45c9b 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -22,9 +22,7 @@ enum bpf_fou_encap_type { FOU_BPF_ENCAP_GUE, }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_set_fou_encap - Set FOU encap parameters * @@ -100,7 +98,7 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 56f6ecc43451..4d42d0756fd7 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -170,3 +170,4 @@ module_init(iptable_nat_init); module_exit(iptable_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy nat table"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index ca5e5b21587c..0e7f53964d0a 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void) module_init(iptable_raw_init); module_exit(iptable_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy raw table"); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 265b39bc435b..482e733c3375 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -186,3 +186,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 defragmentation support"); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index f33aeab9424f..f01b038fc1cd 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -336,3 +336,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) EXPORT_SYMBOL_GPL(nf_send_unreach); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 packet rejection core"); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 98b25e5d147b..d37282c06e3d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -306,7 +306,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq->af_specific = af_ops; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = -1; + treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index ef5472ed6158..7696417d0640 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1315,7 +1315,8 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */ /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss - * - tstamp - wscale - sackperm), + * - tstamp (including sackperm) + * - wscale), * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b. * * In order to allow D-SACK with TCP-AO, the header size should be: @@ -1342,9 +1343,9 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) * large to leave sufficient option space. */ syn_tcp_option_space = MAX_TCP_OPTION_SPACE; + syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; - syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED; if (tcp_ao_len(key) > syn_tcp_option_space) { err = -EMSGSIZE; goto err_kfree; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 50aaa1527150..bcb55d98004c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7115,7 +7115,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; - tcp_rsk(req)->req_usec_ts = -1; + tcp_rsk(req)->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) tcp_rsk(req)->is_mptcp = 0; #endif @@ -7143,9 +7143,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; - if (tmp_opt.tstamp_ok) + if (tmp_opt.tstamp_ok) { + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); - + } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f558c054cf6e..eb13a55d660c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -601,6 +601,44 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, } #endif +static __be32 *process_tcp_ao_options(struct tcp_sock *tp, + const struct tcp_request_sock *tcprsk, + struct tcp_out_options *opts, + struct tcp_key *key, __be32 *ptr) +{ +#ifdef CONFIG_TCP_AO + u8 maclen = tcp_ao_maclen(key->ao_key); + + if (tcprsk) { + u8 aolen = maclen + sizeof(struct tcp_ao_hdr); + + *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | + (tcprsk->ao_keyid << 8) | + (tcprsk->ao_rcv_next)); + } else { + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + return ptr; + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + } + opts->hash_location = (__u8 *)ptr; + ptr += maclen / sizeof(*ptr); + if (unlikely(maclen % sizeof(*ptr))) { + memset(ptr, TCPOPT_NOP, sizeof(*ptr)); + ptr++; + } +#endif + return ptr; +} + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -629,37 +667,7 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, opts->hash_location = (__u8 *)ptr; ptr += 4; } else if (tcp_key_is_ao(key)) { -#ifdef CONFIG_TCP_AO - u8 maclen = tcp_ao_maclen(key->ao_key); - - if (tcprsk) { - u8 aolen = maclen + sizeof(struct tcp_ao_hdr); - - *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | - (tcprsk->ao_keyid << 8) | - (tcprsk->ao_rcv_next)); - } else { - struct tcp_ao_key *rnext_key; - struct tcp_ao_info *ao_info; - - ao_info = rcu_dereference_check(tp->ao_info, - lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); - rnext_key = READ_ONCE(ao_info->rnext_key); - if (WARN_ON_ONCE(!rnext_key)) - goto out_ao; - *ptr++ = htonl((TCPOPT_AO << 24) | - (tcp_ao_len(key->ao_key) << 16) | - (key->ao_key->sndid << 8) | - (rnext_key->rcvid)); - } - opts->hash_location = (__u8 *)ptr; - ptr += maclen / sizeof(*ptr); - if (unlikely(maclen % sizeof(*ptr))) { - memset(ptr, TCPOPT_NOP, sizeof(*ptr)); - ptr++; - } -out_ao: -#endif + ptr = process_tcp_ao_options(tp, tcprsk, opts, key, ptr); } if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | @@ -3693,8 +3701,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); - if (tcp_rsk(req)->req_usec_ts < 0) - tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c index 65a8eaae2fec..55b310a722c7 100644 --- a/net/ipv4/tcp_sigpool.c +++ b/net/ipv4/tcp_sigpool.c @@ -231,7 +231,7 @@ static void cpool_schedule_cleanup(struct kref *kref) */ void tcp_sigpool_release(unsigned int id) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; /* slow-path */ @@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_release); */ void tcp_sigpool_get(unsigned int id) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; kref_get(&cpool[id].kref); } @@ -256,7 +256,7 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RC struct crypto_ahash *hash; rcu_read_lock_bh(); - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) { + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) { rcu_read_unlock_bh(); return -EINVAL; } @@ -301,7 +301,7 @@ EXPORT_SYMBOL_GPL(tcp_sigpool_end); */ size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) { - if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return -EINVAL; return strscpy(buf, cpool[id].alg, buf_len); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a13600..52cf104e3478 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -170,3 +170,4 @@ module_init(ip6table_nat_init); module_exit(ip6table_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy nat table"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 08861d5d1f4d..fc9f6754028f 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void) module_init(ip6table_raw_init); module_exit(ip6table_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy raw table"); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index d59b296b4f51..be7817fbc024 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -182,3 +182,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 defragmentation support"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 58ccdb08c0fd..d45bc54b7ea5 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -413,3 +413,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, EXPORT_SYMBOL_GPL(nf_send_unreach6); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 packet rejection core"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 500f6ed3b8cf..12eedc6ca2cc 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -181,14 +181,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; - if (security_inet_conn_request(sk, skb, req)) - goto out_free; - req->mss = mss; ireq->ir_rmt_port = th->source; ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + if (security_inet_conn_request(sk, skb, req)) + goto out_free; + if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index dd1d8ffd5f59..65d1f6755f98 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1946,4 +1946,5 @@ module_init(kcm_init); module_exit(kcm_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KCM (Kernel Connection Multiplexor) sockets"); MODULE_ALIAS_NETPROTO(PF_KCM); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 7cac441862e2..51bccfb00a9c 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb) skb->transport_header += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { - __be16 pdulen = eth_hdr(skb)->h_proto; - s32 data_size = ntohs(pdulen) - llc_len; + __be16 pdulen; + s32 data_size; + + if (skb->mac_len < ETH_HLEN) + return 0; + + pdulen = eth_hdr(skb)->h_proto; + data_size = ntohs(pdulen) - llc_len; if (data_size < 0 || !pskb_may_pull(skb, data_size)) diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 79d1cef8f15a..06fb8e6944b0 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) int rc = 1; u32 data_size; + if (skb->mac_len < ETH_HLEN) + return 1; + llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 05c6ae092053..f50654292510 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -76,6 +76,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) u32 data_size; struct sk_buff *nskb; + if (skb->mac_len < ETH_HLEN) + goto out; + /* The test request command is type U (llc_len = 3) */ data_size = ntohs(eth_hdr(skb)->h_proto) - 3; nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3230506ae3ff..a2c16b501087 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void) module_init(ip_vs_init); module_exit(ip_vs_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Virtual Server"); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 5e6ec32aff2b..75f4c231f4a0 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void) module_init(ip_vs_dh_init); module_exit(ip_vs_dh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs destination hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c index b846cc385279..ab117e5bc34e 100644 --- a/net/netfilter/ipvs/ip_vs_fo.c +++ b/net/netfilter/ipvs/ip_vs_fo.c @@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void) module_init(ip_vs_fo_init); module_exit(ip_vs_fo_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted failover scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index ef1f45e43b63..f53899d12416 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void) module_init(ip_vs_ftp_init); module_exit(ip_vs_ftp_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs ftp helper"); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index cf78ba4ce5ff..8ceec7a2fa8f 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void) module_init(ip_vs_lblc_init); module_exit(ip_vs_lblc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 9eddf118b40e..0fb64707213f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void) module_init(ip_vs_lblcr_init); module_exit(ip_vs_lblcr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 9d34d81fc6f1..c2764505e380 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void) module_init(ip_vs_lc_init); module_exit(ip_vs_lc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index f56862a87518..ed7f5c889b41 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void) module_init(ip_vs_nq_init); module_exit(ip_vs_nq_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs never queue scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c index c03066fdd5ca..c7708b809700 100644 --- a/net/netfilter/ipvs/ip_vs_ovf.c +++ b/net/netfilter/ipvs/ip_vs_ovf.c @@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void) module_init(ip_vs_ovf_init); module_exit(ip_vs_ovf_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs overflow connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0ac6705a61d3..e4ce1d9a63f9 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void) module_init(ip_vs_sip_init); module_exit(ip_vs_sip_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs sip helper"); diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 38495c6f6c7c..6baa34dff9f0 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void) module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); +MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7663288e5358..a46f99a56618 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void) module_init(ip_vs_sed_init); module_exit(ip_vs_sed_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs shortest expected delay scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index c2028e412092..92e77d7a6b50 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void) module_init(ip_vs_sh_init); module_exit(ip_vs_sh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs source hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c index 3308e4cc740a..8d5419edde50 100644 --- a/net/netfilter/ipvs/ip_vs_twos.c +++ b/net/netfilter/ipvs/ip_vs_twos.c @@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void) module_init(ip_vs_twos_init); module_exit(ip_vs_twos_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs power of twos choice scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 09f584b564a0..9fa500927c0a 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void) module_init(ip_vs_wlc_init); module_exit(ip_vs_wlc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 1bc7a0789d85..85ce0d04afac 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void) module_init(ip_vs_wrr_init); module_exit(ip_vs_wrr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted round-robin scheduler"); diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index b21799d468d2..475358ec8212 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, return 0; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_conntrack BTF"); +__bpf_kfunc_start_defs(); /* bpf_xdp_ct_alloc - Allocate a new CT entry * @@ -467,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 9fb9b8031298..cfa0fe0356de 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -82,3 +82,4 @@ out: EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Broadcast connection tracking helper"); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 334db22199c1..fb0ae15e96df 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -57,6 +57,7 @@ #include "nf_internals.h" MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { struct nf_conn *last; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c928ff63b10e..f36727ed91e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack"); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking"); diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 141ee7783223..6e3b2f58855f 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -12,9 +12,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_nat BTF"); +__bpf_kfunc_start_defs(); /* bpf_ct_set_nat_info - Set source or destination nat address * @@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c4e0516a8dfa..c3d7ecbc777c 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network address translation core"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index 6616ba5d0b04..5b37487d9d11 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -80,6 +80,26 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; +static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope) +{ + unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr); + + if (ifa_addr_type & IPV6_ADDR_MAPPED) + return false; + + if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC))) + return false; + + if (scope) { + unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK; + + if (!(scope & ifa_scope)) + return false; + } + + return true; +} + unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) @@ -89,14 +109,19 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, if (hooknum == NF_INET_LOCAL_OUT) { newdst.in6 = loopback_addr; } else { + unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr); struct inet6_dev *idev; - struct inet6_ifaddr *ifa; bool addr = false; idev = __in6_dev_get(skb->dev); if (idev != NULL) { + const struct inet6_ifaddr *ifa; + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (!nf_nat_redirect_ipv6_usable(ifa, scope)) + continue; + newdst.in6 = ifa->addr; addr = true; break; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3c1fd8283bf4..a761ee6796f6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6520,6 +6520,12 @@ static int nft_setelem_deactivate(const struct net *net, return ret; } +static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) +{ + list_del_rcu(&catchall->list); + kfree_rcu(catchall, rcu); +} + static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -6528,8 +6534,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem_priv) { - list_del_rcu(&catchall->list); - kfree_rcu(catchall, rcu); + nft_setelem_catchall_destroy(catchall); break; } } @@ -9678,11 +9683,12 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, unsigned int gc_seq, bool sync) { - struct nft_set_elem_catchall *catchall; + struct nft_set_elem_catchall *catchall, *next; const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9700,7 +9706,13 @@ dead_elem: if (!gc) return NULL; - nft_trans_gc_elem_add(gc, catchall->elem); + elem_priv = catchall->elem; + if (sync) { + nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); + nft_setelem_catchall_destroy(catchall); + } + + nft_trans_gc_elem_add(gc, elem_priv); } return gc; @@ -11386,4 +11398,5 @@ module_exit(nf_tables_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Framework for packet filtering and classification"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 50723ba08289..c0fc431991e8 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -447,4 +447,5 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Passive OS fingerprint matching"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index 98e4946100c5..40e230d8b712 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -137,6 +137,7 @@ module_init(nft_chain_nat_init); module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("nftables network address translation support"); #ifdef CONFIG_NF_TABLES_IPV4 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); #endif diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 04b51f285332..1bfe258018da 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -204,4 +204,5 @@ bool nft_fib_reduce(struct nft_regs_track *track, EXPORT_SYMBOL_GPL(nft_fib_reduce); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index a5268e6dd32f..358e742afad7 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nftables netdev packet forwarding support"); MODULE_ALIAS_NFT_AF_EXPR(5, "fwd"); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 7ddb9a78e3fc..ef93e0d3bee0 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, { struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; - char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 9c4f231be275..1eeff9422856 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -257,5 +257,6 @@ static void __exit netlink_diag_exit(void) module_init(netlink_diag_init); module_exit(netlink_diag_exit); +MODULE_DESCRIPTION("Netlink-based socket monitoring/diagnostic interface (sock_diag)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0b9a785dea45..3019a4406ca4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -985,7 +985,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (err) return err; - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ac85d4644a3c..df8a271948a1 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -212,7 +212,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) conn->idle_timestamp = jiffies; if (atomic_dec_and_test(&conn->active)) rxrpc_set_service_reap_timer(conn->rxnet, - jiffies + rxrpc_connection_expiry); + jiffies + rxrpc_connection_expiry * HZ); } rxrpc_put_call(call, rxrpc_call_put_io_thread); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 7d910aee4f8c..c553a30e9c83 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -87,7 +87,7 @@ static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) struct rxrpc_local *local = container_of(timer, struct rxrpc_local, client_conn_reap_timer); - if (local->kill_all_client_conns && + if (!local->kill_all_client_conns && test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) rxrpc_wake_up_io_thread(local); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d3f26bf0440..c39252d61ebb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1098,7 +1098,7 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - net_warn_ratelimited("can't go to NULL chain!\n"); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9583645e86c2..0db0ecf1d110 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -376,6 +376,17 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; } +static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry) +{ + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(entry->ct); + if (act_ct_ext) { + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); + } +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp, bool bidirectional) @@ -671,6 +682,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); + tcf_ct_flow_ct_ext_ifidx_update(flow); flow_offload_refresh(nf_ft, flow, force_refresh); if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { /* Process this flow in SW to allow promoting to ASSURED */ @@ -1034,7 +1047,7 @@ do_nat: tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); if (!nf_ct_is_confirmed(ct)) - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); /* This will take care of sending queued events * even if the connection is already confirmed. diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c9a811f4c7ee..393b78729216 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void) module_init(gate_init_module); module_exit(gate_cleanup_module); +MODULE_DESCRIPTION("TC gate action"); MODULE_LICENSE("GPL v2"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1daeb2182b70..1976bd163986 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1658,6 +1658,7 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { + u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1712,8 +1713,14 @@ reclassify: goto reset; } #endif - if (err >= 0) + if (err >= 0) { + /* Policy drop or drop reason is over-written by + * classifiers with a bogus value(0) */ + if (err == TC_ACT_SHOT && + res->drop_reason == SKB_NOT_DROPPED_YET) + tcf_set_drop_reason(res, orig_reason); return err; + } } if (unlikely(n)) { diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 1b92c33b5f81..a1f56931330c 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -341,4 +341,5 @@ static void __exit exit_basic(void) module_init(init_basic) module_exit(exit_basic) +MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index bd9322d71910..7ee8dbf49ed0 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void) module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); +MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c49d6af0e048..afc534ee0a18 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -446,4 +446,5 @@ static void __exit exit_fw(void) module_init(init_fw) module_exit(exit_fw) +MODULE_DESCRIPTION("SKB mark based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1424bfeaca73..12a505db4183 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -684,4 +684,5 @@ static void __exit exit_route4(void) module_init(init_route4) module_exit(exit_route4) +MODULE_DESCRIPTION("Routing table realm based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6663e971a13e..d5bdfd4a7655 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1489,4 +1489,5 @@ static void __exit exit_u32(void) module_init(init_u32) module_exit(exit_u32) +MODULE_DESCRIPTION("Universal 32bit based TC Classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index cac870eb7897..9a0b85190a2c 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void) module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Credit Based shaper"); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 19c851125901..ae1da08e268f 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -513,3 +513,4 @@ module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Choose and keep responsive flows scheduler"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 19901e77cd3b..097740a9afea 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -495,3 +495,4 @@ static void __exit drr_exit(void) module_init(drr_init); module_exit(drr_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Deficit Round Robin scheduler"); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 61d1f0e32cf3..4808159a5466 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -513,3 +513,4 @@ static void __exit etf_module_exit(void) module_init(etf_module_init) module_exit(etf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index b10efeaf0629..f7c88495946b 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -826,3 +826,4 @@ static void __exit ets_exit(void) module_init(ets_init); module_exit(ets_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler"); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index e1040421b797..450f5c67ac49 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt); +MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler"); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 0fd18c344ab5..3a31c47fea9b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -919,14 +919,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, - [TCA_FQ_PRIOMAP] = { - .type = NLA_BINARY, - .len = sizeof(struct tc_prio_qopt), - }, - [TCA_FQ_WEIGHTS] = { - .type = NLA_BINARY, - .len = FQ_BANDS * sizeof(s32), - }, + [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), + [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 872d127c9db4..8c61eb3dc943 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -945,3 +945,4 @@ module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic Random Early Detection qdisc"); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 880c5f16b29c..16c45da4036a 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1693,5 +1693,6 @@ hfsc_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler"); module_init(hfsc_init); module_exit(hfsc_cleanup); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0d947414e616..7349233eaa9b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void) module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler"); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a463a63192c3..5fa9eaa79bfc 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -370,3 +370,4 @@ module_exit(ingress_module_exit); MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs"); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 793009f445c0..43e53ee00a56 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -789,3 +789,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c index 83b3793c4012..b3a5572c167b 100644 --- a/net/sched/sch_mqprio_lib.c +++ b/net/sched/sch_mqprio_lib.c @@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE], EXPORT_SYMBOL_GPL(mqprio_fp_to_offload); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio"); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 75c9c860182b..d66d5f0ec080 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -410,3 +410,4 @@ module_init(multiq_module_init) module_exit(multiq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ba2dc191ed9..fa678eb88528 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void) module_init(netem_module_init) module_exit(netem_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network characteristics emulator qdisc"); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 35f49edf63db..992f0c8d7988 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -226,3 +226,4 @@ static void __exit plug_module_exit(void) module_init(plug_module_init) module_exit(plug_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fdc5ef52c3ee..8ecdd3ef6f8e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -433,3 +433,4 @@ module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple 3-band priority qdisc"); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 28315166fe8e..48a604c320c7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1535,3 +1535,4 @@ static void __exit qfq_exit(void) module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16277b6a0238..607b6c8b3a9b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -563,3 +563,4 @@ module_init(red_module_init) module_exit(red_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Random Early Detection qdisc"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 66dcb18638fe..eb77558fa367 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void) module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Stochastic Fairness qdisc"); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 5df2dacb7b1a..28beb11762d8 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -307,3 +307,4 @@ module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2e1949de4171..31a8252bd09c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void) module_init(taprio_module_init); module_exit(taprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Time Aware Priority qdisc"); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 17d2d00ddb18..dd6b1a723bf7 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void) module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Token Bucket Filter qdisc"); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 7721239c185f..59304611dc00 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -523,3 +523,4 @@ module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index abd2667734d4..da97f946b79b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc) if (!smc->use_fallback) { rc = smc_close_active(smc); - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } else { if (sk->sk_state != SMC_CLOSED) { @@ -1743,7 +1743,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); + smc_sock_set_flag(new_sk, SOCK_DEAD); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; diff --git a/net/smc/smc.h b/net/smc/smc.h index 24745fde4ac2..e377980b8414 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -377,4 +377,9 @@ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info); int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info); +static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) +{ + set_bit(flag, &sk->sk_flags); +} + #endif /* __SMC_H */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 89105e95b452..3c06625ceb20 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -28,13 +28,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, { struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; struct smc_connection *conn = cdcpend->conn; + struct smc_buf_desc *sndbuf_desc; struct smc_sock *smc; int diff; + sndbuf_desc = conn->sndbuf_desc; smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); - if (!wc_status) { - diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, + if (!wc_status && sndbuf_desc) { + diff = smc_curs_diff(sndbuf_desc->len, &cdcpend->conn->tx_curs_fin, &cdcpend->cursor); /* sndbuf_space is decreased in smc_sendmsg */ @@ -114,9 +116,6 @@ int smc_cdc_msg_send(struct smc_connection *conn, union smc_host_cursor cfed; int rc; - if (unlikely(!READ_ONCE(conn->sndbuf_desc))) - return -ENOBUFS; - smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; @@ -385,7 +384,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_shutdown |= RCV_SHUTDOWN; if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(&smc->sk, SOCK_DONE); + smc_sock_set_flag(&smc->sk, SOCK_DONE); sock_hold(&smc->sk); /* sock_put in close_work */ if (!queue_work(smc_close_wq, &conn->close_work)) sock_put(&smc->sk); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index dbdf03e8aa5b..10219f55aad1 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -116,7 +116,8 @@ static void smc_close_cancel_work(struct smc_sock *smc) struct sock *sk = &smc->sk; release_sock(sk); - cancel_work_sync(&smc->conn.close_work); + if (cancel_work_sync(&smc->conn.close_work)) + sock_put(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); } @@ -173,7 +174,7 @@ void smc_close_active_abort(struct smc_sock *smc) break; } - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_state_change(sk); if (release_clcsock) { diff --git a/net/socket.c b/net/socket.c index 0d1c4e78fc7f..3379c64217a4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1685,20 +1685,16 @@ struct file *__sys_socket_file(int family, int type, int protocol) * Therefore, __weak is needed to ensure that the call is still * emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "A fmod_ret entry point for BPF programs"); +__bpf_hook_start(); __weak noinline int update_socket_protocol(int family, int type, int protocol) { return protocol; } -__diag_pop(); +__bpf_hook_end(); int __sys_socket(int family, int type, int protocol) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9c210273d06b..daa9582ec861 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -111,7 +111,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { - __rpc_clnt_remove_pipedir(clnt); + if (pipefs_sb == clnt->pipefs_sb) + __rpc_clnt_remove_pipedir(clnt); rpc_put_sb_net(net); } } @@ -151,6 +152,8 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt) { struct dentry *dentry; + clnt->pipefs_sb = pipefs_sb; + if (clnt->cl_program->pipe_dir_name != NULL) { dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); if (IS_ERR(dentry)) @@ -2171,6 +2174,7 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { case -ECONNREFUSED: + case -ECONNRESET: /* A positive refusal suggests a rebind is needed. */ if (RPC_IS_SOFTCONN(task)) break; @@ -2179,7 +2183,6 @@ call_connect_status(struct rpc_task *task) goto out_retry; } fallthrough; - case -ECONNRESET: case -ECONNABORTED: case -ENETDOWN: case -ENETUNREACH: @@ -2220,7 +2223,7 @@ call_connect_status(struct rpc_task *task) } xprt_switch_put(xps); if (!task->tk_xprt) - return; + goto out; } goto out_retry; case -ENOBUFS: @@ -2235,6 +2238,7 @@ out_next: out_retry: /* Check for timeouts before looping back to call_bind */ task->tk_action = call_bind; +out: rpc_check_timeout(task); } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 5988a5c5ff3f..102c3818bc54 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -769,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task) child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); + if (IS_ERR(child)) { + /* rpcb_map_release() has freed the arguments */ + return; + } xprt->stat.bind_count++; rpc_put_task(child); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ab453ede54f0..2364c485540c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -283,7 +283,7 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: task->tk_status = -EAGAIN; - if (RPC_IS_SOFT(task)) + if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, xprt_request_timeout(req)); else @@ -349,7 +349,7 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: task->tk_status = -EAGAIN; - if (RPC_IS_SOFT(task)) + if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, xprt_request_timeout(req)); else diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a15bf2ede89b..58f3dc8d0d71 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1181,6 +1181,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + transport->xprt_err = 0; clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); @@ -2772,18 +2773,13 @@ static void xs_wake_error(struct sock_xprt *transport) { int sockerr; - if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) - return; - mutex_lock(&transport->recv_mutex); - if (transport->sock == NULL) - goto out; if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) - goto out; + return; sockerr = xchg(&transport->xprt_err, 0); - if (sockerr < 0) + if (sockerr < 0) { xprt_wake_pending_tasks(&transport->xprt, sockerr); -out: - mutex_unlock(&transport->recv_mutex); + xs_tcp_force_close(&transport->xprt); + } } static void xs_wake_pending(struct sock_xprt *transport) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index e8fd257c0e68..1a9a5bdaccf4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -88,7 +88,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING, + [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_LINK_NAME }, [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, @@ -125,7 +125,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_BEARER_NAME }, [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e22c81435ef7..f6dc896bf44c 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -130,6 +130,8 @@ static void virtio_transport_init_hdr(struct sk_buff *skb, hdr->dst_port = cpu_to_le32(dst_port); hdr->flags = cpu_to_le32(info->flags); hdr->len = cpu_to_le32(payload_len); + hdr->buf_alloc = cpu_to_le32(0); + hdr->fwd_cnt = cpu_to_le32(0); } static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, @@ -1369,11 +1371,17 @@ virtio_transport_recv_connected(struct sock *sk, vsk->peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) vsk->peer_shutdown |= SEND_SHUTDOWN; - if (vsk->peer_shutdown == SHUTDOWN_MASK && - vsock_stream_has_data(vsk) <= 0 && - !sock_flag(sk, SOCK_DONE)) { - (void)virtio_transport_reset(vsk, NULL); - virtio_transport_do_close(vsk, true); + if (vsk->peer_shutdown == SHUTDOWN_MASK) { + if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset(vsk, NULL); + virtio_transport_do_close(vsk, true); + } + /* Remove this socket anyway because the remote peer sent + * the shutdown. This way a new connection will succeed + * if the remote peer uses the same source port, + * even if the old socket is still unreleased, but now disconnected. + */ + vsock_remove_sock(vsk); } if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) sk->sk_state_change(sk); diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index d74f3fd20f2b..7d5e920141e9 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -27,9 +27,7 @@ struct bpf_xfrm_info { int link; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in xfrm_interface BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_get_xfrm_info - Get XFRM metadata * @@ -93,7 +91,7 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) diff --git a/sound/core/info.c b/sound/core/info.c index 0b2f04dcb589..e2f302e55bbb 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -56,7 +56,7 @@ struct snd_info_private_data { }; static int snd_info_version_init(void); -static void snd_info_disconnect(struct snd_info_entry *entry); +static void snd_info_clear_entries(struct snd_info_entry *entry); /* @@ -569,11 +569,16 @@ void snd_info_card_disconnect(struct snd_card *card) { if (!card) return; - mutex_lock(&info_mutex); + proc_remove(card->proc_root_link); - card->proc_root_link = NULL; if (card->proc_root) - snd_info_disconnect(card->proc_root); + proc_remove(card->proc_root->p); + + mutex_lock(&info_mutex); + if (card->proc_root) + snd_info_clear_entries(card->proc_root); + card->proc_root_link = NULL; + card->proc_root = NULL; mutex_unlock(&info_mutex); } @@ -745,15 +750,14 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, } EXPORT_SYMBOL(snd_info_create_card_entry); -static void snd_info_disconnect(struct snd_info_entry *entry) +static void snd_info_clear_entries(struct snd_info_entry *entry) { struct snd_info_entry *p; if (!entry->p) return; list_for_each_entry(p, &entry->children, list) - snd_info_disconnect(p); - proc_remove(entry->p); + snd_info_clear_entries(p); entry->p = NULL; } @@ -770,8 +774,9 @@ void snd_info_free_entry(struct snd_info_entry * entry) if (!entry) return; if (entry->p) { + proc_remove(entry->p); mutex_lock(&info_mutex); - snd_info_disconnect(entry); + snd_info_clear_entries(entry); mutex_unlock(&info_mutex); } diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c index 23cf8284ce36..0ba8f0c4cd99 100644 --- a/sound/oss/dmasound/dmasound_paula.c +++ b/sound/oss/dmasound/dmasound_paula.c @@ -720,15 +720,14 @@ static int __init amiga_audio_probe(struct platform_device *pdev) return dmasound_init(); } -static int __exit amiga_audio_remove(struct platform_device *pdev) +static void __exit amiga_audio_remove(struct platform_device *pdev) { dmasound_deinit(); - return 0; } static struct platform_driver amiga_audio_driver = { - .remove = __exit_p(amiga_audio_remove), - .driver = { + .remove_new = __exit_p(amiga_audio_remove), + .driver = { .name = "amiga-audio", }, }; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 3c9e6e97ad0f..03264915c618 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2068,6 +2068,7 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1022, 0xd601) }, /* ASRock X670E Taichi */ {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 58006c8bcfb9..669ae3d6e447 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7343,8 +7343,10 @@ enum { ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, ALC299_FIXUP_PREDATOR_SPK, ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, + ALC289_FIXUP_DELL_SPK1, ALC289_FIXUP_DELL_SPK2, ALC289_FIXUP_DUAL_SPK, + ALC289_FIXUP_RTK_AMP_DUAL_SPK, ALC294_FIXUP_SPK2_TO_DAC1, ALC294_FIXUP_ASUS_DUAL_SPK, ALC285_FIXUP_THINKPAD_X1_GEN7, @@ -7444,6 +7446,8 @@ enum { ALC287_FIXUP_THINKPAD_I2S_SPK, ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, ALC2XX_FIXUP_HEADSET_MIC, + ALC289_FIXUP_DELL_CS35L41_SPI_2, + ALC294_FIXUP_CS35L41_I2C_2, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -8670,6 +8674,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE }, + [ALC289_FIXUP_DELL_SPK1] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90170140 }, + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE + }, [ALC289_FIXUP_DELL_SPK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -8685,6 +8698,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC289_FIXUP_DELL_SPK2 }, + [ALC289_FIXUP_RTK_AMP_DUAL_SPK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC289_FIXUP_DELL_SPK1 + }, [ALC294_FIXUP_SPK2_TO_DAC1] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_speaker2_to_dac1, @@ -9552,6 +9571,16 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mic, }, + [ALC289_FIXUP_DELL_CS35L41_SPI_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_spi_two, + .chained = true, + .chain_id = ALC289_FIXUP_DUAL_SPK + }, + [ALC294_FIXUP_CS35L41_I2C_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9662,13 +9691,15 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), - SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc0, "Dell Oasis 13", ALC289_FIXUP_RTK_AMP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc5, "Dell Oasis 14", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -9913,6 +9944,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), @@ -9929,6 +9961,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 1185e5aac523..60cbc881be6e 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -26,7 +26,6 @@ #define DRV_NAME "acp_i2s_playcap" #define I2S_MASTER_MODE_ENABLE 1 -#define I2S_MODE_ENABLE 0 #define LRCLK_DIV_FIELD GENMASK(10, 2) #define BCLK_DIV_FIELD GENMASK(23, 11) #define ACP63_LRCLK_DIV_FIELD GENMASK(12, 2) @@ -56,7 +55,8 @@ static inline void acp_set_i2s_clk(struct acp_dev_data *adata, int dai_id) } val = I2S_MASTER_MODE_ENABLE; - val |= I2S_MODE_ENABLE & BIT(1); + if (adata->tdm_mode) + val |= BIT(1); switch (chip->acp_rev) { case ACP63_DEV: diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c index 77227c8f01f6..3c459a67ad0c 100644 --- a/sound/soc/codecs/aw88395/aw88395.c +++ b/sound/soc/codecs/aw88395/aw88395.c @@ -356,7 +356,7 @@ static const struct snd_kcontrol_new aw88395_controls[] = { aw88395_get_fade_in_time, aw88395_set_fade_in_time), SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN, aw88395_get_fade_out_time, aw88395_set_fade_out_time), - SOC_SINGLE_EXT("Calib", 0, 0, 100, 0, + SOC_SINGLE_EXT("Calib", 0, 0, AW88395_CALI_RE_MAX, 0, aw88395_re_get, aw88395_re_set), AW88395_PROFILE_EXT("Profile Set", aw88395_profile_info, aw88395_profile_get, aw88395_profile_set), diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index ce30bc7cdea9..54f8457e8497 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -438,7 +438,7 @@ static int aw_dev_set_vcalb(struct aw88399 *aw88399) if (ret) return ret; - vsense_select = vsense_select & (~AW88399_VDSEL_MASK); + vsense_select = vsense_value & (~AW88399_VDSEL_MASK); ret = aw88399_dev_get_icalk(aw88399, &icalk); if (ret) { @@ -486,8 +486,8 @@ static int aw_dev_update_cali_re(struct aw_cali_desc *cali_desc) u32 cali_re; int ret; - if ((aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MAX) || - (aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MIN)) + if ((aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MAX) || + (aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MIN)) return -EINVAL; cali_re = AW88399_SHOW_RE_TO_DSP_RE((aw_dev->cali_desc.cali_re + @@ -1710,7 +1710,7 @@ static const struct snd_kcontrol_new aw88399_controls[] = { aw88399_get_fade_in_time, aw88399_set_fade_in_time), SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN, aw88399_get_fade_out_time, aw88399_set_fade_out_time), - SOC_SINGLE_EXT("Calib", 0, 0, 100, 0, + SOC_SINGLE_EXT("Calib", 0, 0, AW88399_CALI_RE_MAX, 0, aw88399_re_get, aw88399_re_set), AW88399_PROFILE_EXT("AW88399 Profile Set", aw88399_profile_info, aw88399_profile_get, aw88399_profile_set), diff --git a/sound/soc/codecs/aw88399.h b/sound/soc/codecs/aw88399.h index 8b3f1e101985..4f391099d0f2 100644 --- a/sound/soc/codecs/aw88399.h +++ b/sound/soc/codecs/aw88399.h @@ -522,7 +522,7 @@ enum { enum { AW88399_DEV_VDSEL_DAC = 0, - AW88399_DEV_VDSEL_VSENSE = 1, + AW88399_DEV_VDSEL_VSENSE = 32, }; enum { diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 3bbe85091649..4c4405942779 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -927,10 +927,15 @@ void da7219_aad_suspend(struct snd_soc_component *component) struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); u8 micbias_ctrl; + disable_irq(da7219_aad->irq); + if (da7219_aad->jack) { /* Disable jack detection during suspend */ snd_soc_component_update_bits(component, DA7219_ACCDET_CONFIG_1, DA7219_ACCDET_EN_MASK, 0); + cancel_delayed_work_sync(&da7219_aad->jack_det_work); + /* Disable ground switch */ + snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00); /* * If we have a 4-pole jack inserted, then micbias will be @@ -947,8 +952,6 @@ void da7219_aad_suspend(struct snd_soc_component *component) } } } - - synchronize_irq(da7219_aad->irq); } void da7219_aad_resume(struct snd_soc_component *component) @@ -971,6 +974,8 @@ void da7219_aad_resume(struct snd_soc_component *component) DA7219_ACCDET_EN_MASK, DA7219_ACCDET_EN_MASK); } + + enable_irq(da7219_aad->irq); } diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 09eef6042aad..20da1eaa4f1c 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -877,18 +877,13 @@ static int hdmi_codec_set_jack(struct snd_soc_component *component, void *data) { struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); - int ret = -ENOTSUPP; if (hcp->hcd.ops->hook_plugged_cb) { hcp->jack = jack; - ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent, - hcp->hcd.data, - plugged_cb, - component->dev); - if (ret) - hcp->jack = NULL; + return 0; } - return ret; + + return -ENOTSUPP; } static int hdmi_dai_spdif_probe(struct snd_soc_dai *dai) @@ -982,6 +977,21 @@ static int hdmi_of_xlate_dai_id(struct snd_soc_component *component, return ret; } +static int hdmi_probe(struct snd_soc_component *component) +{ + struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); + int ret = 0; + + if (hcp->hcd.ops->hook_plugged_cb) { + ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent, + hcp->hcd.data, + plugged_cb, + component->dev); + } + + return ret; +} + static void hdmi_remove(struct snd_soc_component *component) { struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); @@ -992,6 +1002,7 @@ static void hdmi_remove(struct snd_soc_component *component) } static const struct snd_soc_component_driver hdmi_driver = { + .probe = hdmi_probe, .remove = hdmi_remove, .dapm_widgets = hdmi_widgets, .num_dapm_widgets = ARRAY_SIZE(hdmi_widgets), diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index 5cf28d034f09..f66417a0f29f 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -530,12 +530,61 @@ static int nau8540_set_tdm_slot(struct snd_soc_dai *dai, return 0; } +static int nau8540_dai_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct nau8540 *nau8540 = snd_soc_component_get_drvdata(component); + struct regmap *regmap = nau8540->regmap; + unsigned int val; + int ret = 0; + + /* Reading the peak data to detect abnormal data in the ADC channel. + * If abnormal data happens, the driver takes recovery actions to + * refresh the ADC channel. + */ + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, + NAU8540_CLK_AGC_EN, NAU8540_CLK_AGC_EN); + regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3, + NAU8540_ALC_CH_ALL_EN, NAU8540_ALC_CH_ALL_EN); + + regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val); + dev_dbg(nau8540->dev, "1.ADC CH1 peak data %x", val); + if (!val) { + regmap_update_bits(regmap, NAU8540_REG_MUTE, + NAU8540_PGA_CH_ALL_MUTE, NAU8540_PGA_CH_ALL_MUTE); + regmap_update_bits(regmap, NAU8540_REG_MUTE, + NAU8540_PGA_CH_ALL_MUTE, 0); + regmap_write(regmap, NAU8540_REG_RST, 0x1); + regmap_write(regmap, NAU8540_REG_RST, 0); + regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val); + dev_dbg(nau8540->dev, "2.ADC CH1 peak data %x", val); + if (!val) { + dev_err(nau8540->dev, "Channel recovery failed!!"); + ret = -EIO; + } + } + regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, + NAU8540_CLK_AGC_EN, 0); + regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3, + NAU8540_ALC_CH_ALL_EN, 0); + break; + + default: + break; + } + + return ret; +} static const struct snd_soc_dai_ops nau8540_dai_ops = { .startup = nau8540_dai_startup, .hw_params = nau8540_hw_params, .set_fmt = nau8540_set_fmt, .set_tdm_slot = nau8540_set_tdm_slot, + .trigger = nau8540_dai_trigger, }; #define NAU8540_RATES SNDRV_PCM_RATE_8000_48000 diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index 305ea9207cf0..2ce6063d462b 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -85,6 +85,7 @@ /* CLOCK_CTRL (0x02) */ #define NAU8540_CLK_ADC_EN (0x1 << 15) +#define NAU8540_CLK_AGC_EN (0x1 << 3) #define NAU8540_CLK_I2S_EN (0x1 << 1) /* CLOCK_SRC (0x03) */ @@ -168,6 +169,13 @@ #define NAU8540_TDM_OFFSET_EN (0x1 << 14) #define NAU8540_TDM_TX_MASK 0xf +/* ALC_CONTROL_3 (0x22) */ +#define NAU8540_ALC_CH1_EN (0x1 << 12) +#define NAU8540_ALC_CH2_EN (0x1 << 13) +#define NAU8540_ALC_CH3_EN (0x1 << 14) +#define NAU8540_ALC_CH4_EN (0x1 << 15) +#define NAU8540_ALC_CH_ALL_EN (0xf << 12) + /* ADC_SAMPLE_RATE (0x3A) */ #define NAU8540_CH_SYNC (0x1 << 14) #define NAU8540_ADC_OSR_MASK 0x3 @@ -181,6 +189,13 @@ #define NAU8540_VMID_SEL_SFT 4 #define NAU8540_VMID_SEL_MASK (0x3 << NAU8540_VMID_SEL_SFT) +/* MUTE (0x61) */ +#define NAU8540_PGA_CH1_MUTE 0x1 +#define NAU8540_PGA_CH2_MUTE 0x2 +#define NAU8540_PGA_CH3_MUTE 0x4 +#define NAU8540_PGA_CH4_MUTE 0x8 +#define NAU8540_PGA_CH_ALL_MUTE 0xf + /* MIC_BIAS (0x67) */ #define NAU8540_PU_PRE (0x1 << 8) diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c index 7077ff6ba1f4..6954fbe7ec5f 100644 --- a/sound/soc/codecs/rt712-sdca.c +++ b/sound/soc/codecs/rt712-sdca.c @@ -963,13 +963,6 @@ static int rt712_sdca_probe(struct snd_soc_component *component) rt712_sdca_parse_dt(rt712, &rt712->slave->dev); rt712->component = component; - if (!rt712->first_hw_init) - return 0; - - ret = pm_runtime_resume(component->dev); - if (ret < 0 && ret != -EACCES) - return ret; - /* add SPK route */ if (rt712->hw_id != RT712_DEV_ID_713) { snd_soc_add_component_controls(component, @@ -980,6 +973,13 @@ static int rt712_sdca_probe(struct snd_soc_component *component) rt712_sdca_spk_dapm_routes, ARRAY_SIZE(rt712_sdca_spk_dapm_routes)); } + if (!rt712->first_hw_init) + return 0; + + ret = pm_runtime_resume(component->dev); + if (ret < 0 && ret != -EACCES) + return ret; + return 0; } diff --git a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c index 6dfcfcf47cab..f78197c8e582 100644 --- a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c +++ b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c @@ -1216,7 +1216,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev) playback_codec = of_get_child_by_name(pdev->dev.of_node, "playback-codecs"); if (!playback_codec) { ret = -EINVAL; - dev_err_probe(&pdev->dev, ret, "Property 'speaker-codecs' missing or invalid\n"); + dev_err_probe(&pdev->dev, ret, "Property 'playback-codecs' missing or invalid\n"); goto err_playback_codec; } @@ -1230,7 +1230,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev) for_each_card_prelinks(card, i, dai_link) { ret = mt8186_mt6366_card_set_be_link(card, dai_link, playback_codec, "I2S3"); if (ret) { - dev_err_probe(&pdev->dev, ret, "%s set speaker_codec fail\n", + dev_err_probe(&pdev->dev, ret, "%s set playback_codec fail\n", dai_link->name); goto err_probe; } diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 5c51dbef6e86..860e66ec85e8 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -1757,7 +1757,7 @@ static struct platform_driver rockchip_i2s_tdm_driver = { .remove_new = rockchip_i2s_tdm_remove, .driver = { .name = DRV_NAME, - .of_match_table = of_match_ptr(rockchip_i2s_tdm_match), + .of_match_table = rockchip_i2s_tdm_match, .pm = &rockchip_i2s_tdm_pm_ops, }, }; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 4e2beda6f9bf..3844f777c87b 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3670,7 +3670,7 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, dapm_pinctrl_event(w, NULL, SND_SOC_DAPM_POST_PMD); break; case snd_soc_dapm_clock_supply: - w->clk = devm_clk_get(dapm->dev, w->name); + w->clk = devm_clk_get(dapm->dev, widget->name); if (IS_ERR(w->clk)) { ret = PTR_ERR(w->clk); goto request_failed; diff --git a/sound/soc/sof/sof-client.c b/sound/soc/sof/sof-client.c index 9dce7f53b482..54dca91255a0 100644 --- a/sound/soc/sof/sof-client.c +++ b/sound/soc/sof/sof-client.c @@ -176,7 +176,7 @@ int sof_register_clients(struct snd_sof_dev *sdev) goto err_kernel_injector; } - /* Platform depndent client device registration */ + /* Platform dependent client device registration */ if (sof_ops(sdev) && sof_ops(sdev)->register_ipc_clients) ret = sof_ops(sdev)->register_ipc_clients(sdev); diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c index 7643a54592f5..2110ffe5281c 100644 --- a/sound/soc/ti/omap-mcbsp.c +++ b/sound/soc/ti/omap-mcbsp.c @@ -73,14 +73,16 @@ static int omap2_mcbsp_set_clks_src(struct omap_mcbsp *mcbsp, u8 fck_src_id) return 0; } - pm_runtime_put_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_put_sync(mcbsp->dev); r = clk_set_parent(mcbsp->fclk, fck_src); if (r) dev_err(mcbsp->dev, "CLKS: could not clk_set_parent() to %s\n", src); - pm_runtime_get_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_get_sync(mcbsp->dev); clk_put(fck_src); diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 75b744b47986..bc5065bd99b2 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -121,6 +121,8 @@ const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value) static const char * const devlink_port_fn_attr_cap_strmap[] = { [0] = "roce-bit", [1] = "migratable-bit", + [2] = "ipsec-crypto-bit", + [3] = "ipsec-packet-bit", }; const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value) diff --git a/tools/net/ynl/generated/nfsd-user.c b/tools/net/ynl/generated/nfsd-user.c index fec6828680ce..360b6448c6e9 100644 --- a/tools/net/ynl/generated/nfsd-user.c +++ b/tools/net/ynl/generated/nfsd-user.c @@ -50,9 +50,116 @@ struct ynl_policy_nest nfsd_rpc_status_nest = { /* Common nested types */ /* ============== NFSD_CMD_RPC_STATUS_GET ============== */ /* NFSD_CMD_RPC_STATUS_GET - dump */ -void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp) +int nfsd_rpc_status_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) { - struct nfsd_rpc_status_get_list *next = rsp; + struct nfsd_rpc_status_get_rsp_dump *dst; + struct ynl_parse_arg *yarg = data; + unsigned int n_compound_ops = 0; + const struct nlattr *attr; + int i; + + dst = yarg->data; + + if (dst->compound_ops) + return ynl_error_parse(yarg, "attribute already present (rpc-status.compound-ops)"); + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NFSD_A_RPC_STATUS_XID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xid = 1; + dst->xid = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_FLAGS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.flags = 1; + dst->flags = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_PROG) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.prog = 1; + dst->prog = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_VERSION) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.version = 1; + dst->version = mnl_attr_get_u8(attr); + } else if (type == NFSD_A_RPC_STATUS_PROC) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.proc = 1; + dst->proc = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_SERVICE_TIME) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.service_time = 1; + dst->service_time = mnl_attr_get_u64(attr); + } else if (type == NFSD_A_RPC_STATUS_SADDR4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.saddr4 = 1; + dst->saddr4 = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_DADDR4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.daddr4 = 1; + dst->daddr4 = mnl_attr_get_u32(attr); + } else if (type == NFSD_A_RPC_STATUS_SADDR6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.saddr6_len = len; + dst->saddr6 = malloc(len); + memcpy(dst->saddr6, mnl_attr_get_payload(attr), len); + } else if (type == NFSD_A_RPC_STATUS_DADDR6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.daddr6_len = len; + dst->daddr6 = malloc(len); + memcpy(dst->daddr6, mnl_attr_get_payload(attr), len); + } else if (type == NFSD_A_RPC_STATUS_SPORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sport = 1; + dst->sport = mnl_attr_get_u16(attr); + } else if (type == NFSD_A_RPC_STATUS_DPORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dport = 1; + dst->dport = mnl_attr_get_u16(attr); + } else if (type == NFSD_A_RPC_STATUS_COMPOUND_OPS) { + n_compound_ops++; + } + } + + if (n_compound_ops) { + dst->compound_ops = calloc(n_compound_ops, sizeof(*dst->compound_ops)); + dst->n_compound_ops = n_compound_ops; + i = 0; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == NFSD_A_RPC_STATUS_COMPOUND_OPS) { + dst->compound_ops[i] = mnl_attr_get_u32(attr); + i++; + } + } + } + + return MNL_CB_OK; +} + +void +nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp) +{ + struct nfsd_rpc_status_get_rsp_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -65,15 +172,16 @@ void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp) } } -struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys) +struct nfsd_rpc_status_get_rsp_list * +nfsd_rpc_status_get_dump(struct ynl_sock *ys) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_list); - yds.cb = nfsd_rpc_status_get_rsp_parse; + yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_rsp_list); + yds.cb = nfsd_rpc_status_get_rsp_dump_parse; yds.rsp_cmd = NFSD_CMD_RPC_STATUS_GET; yds.rsp_policy = &nfsd_rpc_status_nest; @@ -86,7 +194,7 @@ struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys) return yds.first; free_list: - nfsd_rpc_status_get_list_free(yds.first); + nfsd_rpc_status_get_rsp_list_free(yds.first); return NULL; } diff --git a/tools/net/ynl/generated/nfsd-user.h b/tools/net/ynl/generated/nfsd-user.h index b6b69501031a..989c6e209ced 100644 --- a/tools/net/ynl/generated/nfsd-user.h +++ b/tools/net/ynl/generated/nfsd-user.h @@ -21,13 +21,47 @@ const char *nfsd_op_str(int op); /* Common nested types */ /* ============== NFSD_CMD_RPC_STATUS_GET ============== */ /* NFSD_CMD_RPC_STATUS_GET - dump */ -struct nfsd_rpc_status_get_list { - struct nfsd_rpc_status_get_list *next; - struct nfsd_rpc_status_get_rsp obj __attribute__ ((aligned (8))); +struct nfsd_rpc_status_get_rsp_dump { + struct { + __u32 xid:1; + __u32 flags:1; + __u32 prog:1; + __u32 version:1; + __u32 proc:1; + __u32 service_time:1; + __u32 saddr4:1; + __u32 daddr4:1; + __u32 saddr6_len; + __u32 daddr6_len; + __u32 sport:1; + __u32 dport:1; + } _present; + + __u32 xid /* big-endian */; + __u32 flags; + __u32 prog; + __u8 version; + __u32 proc; + __s64 service_time; + __u32 saddr4 /* big-endian */; + __u32 daddr4 /* big-endian */; + void *saddr6; + void *daddr6; + __u16 sport /* big-endian */; + __u16 dport /* big-endian */; + unsigned int n_compound_ops; + __u32 *compound_ops; +}; + +struct nfsd_rpc_status_get_rsp_list { + struct nfsd_rpc_status_get_rsp_list *next; + struct nfsd_rpc_status_get_rsp_dump obj __attribute__((aligned(8))); }; -void nfsd_rpc_status_get_list_free(struct nfsd_rpc_status_get_list *rsp); +void +nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp); -struct nfsd_rpc_status_get_list *nfsd_rpc_status_get_dump(struct ynl_sock *ys); +struct nfsd_rpc_status_get_rsp_list * +nfsd_rpc_status_get_dump(struct ynl_sock *ys); #endif /* _LINUX_NFSD_GEN_H */ diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 13427436bfb7..c4003a83cd5d 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -3,6 +3,7 @@ import argparse import collections +import filecmp import os import re import shutil @@ -1168,7 +1169,7 @@ class CodeWriter: if out_file is None: self._out = os.sys.stdout else: - self._out = tempfile.TemporaryFile('w+') + self._out = tempfile.NamedTemporaryFile('w+') self._out_file = out_file def __del__(self): @@ -1177,6 +1178,10 @@ class CodeWriter: def close_out_file(self): if self._out == os.sys.stdout: return + # Avoid modifying the file if contents didn't change + self._out.flush() + if os.path.isfile(self._out_file) and filecmp.cmp(self._out.name, self._out_file, shallow=False): + return with open(self._out_file, 'w+') as out_file: self._out.seek(0) shutil.copyfileobj(self._out, out_file) diff --git a/tools/power/cpupower/man/cpupower-powercap-info.1 b/tools/power/cpupower/man/cpupower-powercap-info.1 index df3087000efb..145d6f06fa72 100644 --- a/tools/power/cpupower/man/cpupower-powercap-info.1 +++ b/tools/power/cpupower/man/cpupower-powercap-info.1 @@ -17,7 +17,7 @@ settings of all cores, see cpupower(1) how to choose specific cores. .SH "DOCUMENTATION" kernel sources: -Documentation/power/powercap/powercap.txt +Documentation/power/powercap/powercap.rst .SH "SEE ALSO" diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a5e246f7b202..91907b321f91 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 { int b; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in bpf_testmod.ko BTF"); +__bpf_hook_start(); noinline int bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { @@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a) } EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); -__diag_pop(); +__bpf_hook_end(); static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 1a9eeefda9a8..8bf497a9843e 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -326,20 +326,14 @@ static int map_create(__u32 type, const char *name, struct bpf_map_create_opts * static int create_hash(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = BPF_F_NO_PREALLOC, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); } static int create_percpu_hash(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = BPF_F_NO_PREALLOC, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); } @@ -356,21 +350,17 @@ static int create_percpu_hash_prealloc(void) static int create_lru_hash(__u32 type, __u32 map_flags) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = map_flags, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags); return map_create(type, "lru_hash", &map_opts); } static int create_hash_of_maps(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC, .inner_map_fd = create_small_hash(), - }; + ); int ret; ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index e02feb5fae97..574d9a0cdc8e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include <bpf/libbpf.h> #include <bpf/btf.h> +#include "iters_css_task.skel.h" #include "cgroup_iter.skel.h" #include "cgroup_helpers.h" @@ -263,6 +264,35 @@ close_cgrp: close(cgrp_fd); } +static void test_walk_self_only_css_task(void) +{ + struct iters_css_task *skel; + int err; + + skel = iters_css_task__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.cgroup_id_printer, true); + + err = iters_css_task__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = join_cgroup(cg_path[CHILD2]); + if (!ASSERT_OK(err, "join_cgroup")) + goto cleanup; + + skel->bss->target_pid = getpid(); + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]); + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2], + BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task"); + ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt"); +cleanup: + iters_css_task__destroy(skel); +} + void test_cgroup_iter(void) { struct cgroup_iter *skel = NULL; @@ -293,6 +323,9 @@ void test_cgroup_iter(void) test_walk_self_only(skel); if (test__start_subtest("cgroup_iter__dead_self_only")) test_walk_dead_self_only(skel); + if (test__start_subtest("cgroup_iter__self_only_css_task")) + test_walk_self_only_css_task(); + out: cgroup_iter__destroy(skel); cleanup_cgroups(); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index c2425791c923..bf84d4a1d9ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -294,6 +294,7 @@ void test_iters(void) RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + RUN_TESTS(iters_css_task); if (env.has_testmod) RUN_TESTS(iters_testmod_seq); diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index 214d9f4a94a5..ea933fd151c3 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -8,7 +8,8 @@ #include <sys/types.h> #include <test_progs.h> -#define TDIR "/sys/kernel/debug" +/* TDIR must be in a location we can create a directory in. */ +#define TDIR "/tmp/test_bpffs_testdir" static int read_iter(char *file) { @@ -43,8 +44,11 @@ static int fn(void) if (!ASSERT_OK(err, "mount /")) goto out; - err = umount(TDIR); - if (!ASSERT_OK(err, "umount " TDIR)) + err = mkdir(TDIR, 0777); + /* If the directory already exists we can carry on. It may be left over + * from a previous run. + */ + if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR)) goto out; err = mount("none", TDIR, "tmpfs", 0, NULL); @@ -138,6 +142,7 @@ out: rmdir(TDIR "/fs1"); rmdir(TDIR "/fs2"); umount(TDIR); + rmdir(TDIR); exit(err); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e3e68c97b40c..e5c61aa6604a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -46,6 +46,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" @@ -153,6 +154,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c index 5089ce384a1c..9ac758649cb8 100644 --- a/tools/testing/selftests/bpf/progs/iters_css_task.c +++ b/tools/testing/selftests/bpf/progs/iters_css_task.c @@ -10,6 +10,7 @@ char _license[] SEC("license") = "GPL"; +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym; @@ -45,3 +46,57 @@ int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma, return -EPERM; } + +static inline u64 cgroup_id(struct cgroup *cgrp) +{ + return cgrp->kn->id; +} + +SEC("?iter/cgroup") +int cgroup_id_printer(struct bpf_iter__cgroup *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct cgroup *cgrp = ctx->cgroup; + struct cgroup_subsys_state *css; + struct task_struct *task; + + /* epilogue */ + if (cgrp == NULL) { + BPF_SEQ_PRINTF(seq, "epilogue\n"); + return 0; + } + + /* prologue */ + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "prologue\n"); + + BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp)); + + css = &cgrp->self; + css_task_cnt = 0; + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) { + if (task->pid == target_pid) + css_task_cnt++; + } + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int BPF_PROG(iter_css_task_for_each_sleep) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id); + struct cgroup_subsys_state *css; + struct task_struct *task; + + if (cgrp == NULL) + return 0; + css = &cgrp->self; + + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) { + + } + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c index c3bf96a67dba..6b1588d70652 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_failure.c +++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c @@ -84,8 +84,8 @@ int BPF_PROG(iter_css_lock_and_unlock) return 0; } -SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -__failure __msg("css_task_iter is only allowed in bpf_lsm and bpf iter-s") +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs") int BPF_PROG(iter_css_task_for_each) { u64 cg_id = bpf_get_current_cgroup_id(); diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 3ddcb3777912..2a2a942737d7 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -7,7 +7,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 + defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 107525fb4a6a..e61755656e8d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 9f202eda952f..d1edbcff9a18 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 375525329637..d4427d8e1217 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index b2a04d1179d0..cbb9d6714f53 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c new file mode 100644 index 000000000000..193c0f8272d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 SUSE LLC */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8") +__naked int bpf_neg(void) +{ + asm volatile ( + "r2 = 8;" + "r2 = -r2;" + "if r2 != -8 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_to_le(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = le16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_to_be(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = be16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_bswap(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = bswap16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#endif /* v4 instruction */ diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 8fc5174808b2..2a2271cf0294 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c index 3af2501082b2..b616575c3b00 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c +++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c @@ -65,3 +65,35 @@ .expected_attach_type = BPF_SK_LOOKUP, .runs = -1, }, +{ + "BPF_ST_MEM stack imm sign", + /* Check if verifier correctly reasons about sign of an + * immediate spilled to stack by BPF_ST instruction. + * + * fp[-8] = -44; + * r0 = fp[-8]; + * if r0 s< 0 goto ret0; + * r0 = -1; + * exit; + * ret0: + * r0 = 0; + * exit; + */ + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* Use prog type that requires return value in range [0, 1] */ + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .result = VERBOSE_ACCEPT, + .runs = -1, + .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\ + 2: (c5) if r0 s< 0x0 goto pc+2\ + R0_w=-44", +}, diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 17c0f92ff160..c3ba40d0b9de 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -430,7 +430,7 @@ static void print_usage(void) static void read_args(int argc, char *argv[]) { - char opt; + int opt; while ((opt = getopt(argc, argv, "mh")) != -1) { switch (opt) { diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f838dd370f6a..b3b2dc5a630c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -2048,7 +2048,7 @@ run_test() { case $ret in 0) all_skipped=false - [ $exitcode=$ksft_skip ] && exitcode=0 + [ $exitcode -eq $ksft_skip ] && exitcode=0 ;; $ksft_skip) [ $all_skipped = true ] && exitcode=$ksft_skip diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile index ebdbb3c22e54..f224b84591fb 100644 --- a/tools/testing/selftests/riscv/hwprobe/Makefile +++ b/tools/testing/selftests/riscv/hwprobe/Makefile @@ -2,9 +2,14 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := hwprobe +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := hwprobe cbo include ../../lib.mk $(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S - $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/cbo: cbo.c sys_hwprobe.S + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c new file mode 100644 index 000000000000..50a2cc8aef38 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + * + * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a + * subset of cpus, as well as only executing the tests on those cpus. + */ +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <sched.h> +#include <signal.h> +#include <assert.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <asm/ucontext.h> + +#include "hwprobe.h" +#include "../../kselftest.h" + +#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) + +static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; + +static bool illegal_insn; + +static void sigill_handler(int sig, siginfo_t *info, void *context) +{ + unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext; + uint32_t insn = *(uint32_t *)regs[0]; + + assert(insn == MK_CBO(regs[11])); + + illegal_insn = true; + regs[0] += 4; +} + +static void cbo_insn(char *base, int fn) +{ + uint32_t insn = MK_CBO(fn); + + asm volatile( + "mv a0, %0\n" + "li a1, %1\n" + ".4byte %2\n" + : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory"); +} + +static void cbo_inval(char *base) { cbo_insn(base, 0); } +static void cbo_clean(char *base) { cbo_insn(base, 1); } +static void cbo_flush(char *base) { cbo_insn(base, 2); } +static void cbo_zero(char *base) { cbo_insn(base, 4); } + +static void test_no_zicbom(void *arg) +{ + ksft_print_msg("Testing Zicbom instructions remain privileged\n"); + + illegal_insn = false; + cbo_clean(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.clean\n"); + + illegal_insn = false; + cbo_flush(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.flush\n"); + + illegal_insn = false; + cbo_inval(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.inval\n"); +} + +static void test_no_zicboz(void *arg) +{ + ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n"); + + illegal_insn = false; + cbo_zero(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.zero\n"); +} + +static bool is_power_of_2(__u64 n) +{ + return n != 0 && (n & (n - 1)) == 0; +} + +static void test_zicboz(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE, + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + int i, j; + long rc; + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE && + is_power_of_2(block_size), "Zicboz block size\n"); + ksft_print_msg("Zicboz block size: %ld\n", block_size); + + illegal_insn = false; + cbo_zero(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.zero\n"); + + if (illegal_insn || !is_power_of_2(block_size)) { + ksft_test_result_skip("cbo.zero check\n"); + return; + } + + assert(block_size <= 1024); + + for (i = 0; i < 4096 / block_size; ++i) { + if (i % 2) + cbo_zero(&mem[i * block_size]); + } + + for (i = 0; i < 4096 / block_size; ++i) { + char expected = i % 2 ? 0x0 : 0xa5; + + for (j = 0; j < block_size; ++j) { + if (mem[i * block_size + j] != expected) { + ksft_test_result_fail("cbo.zero check\n"); + ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n", + i * block_size + j, expected); + return; + } + } + } + + ksft_test_result_pass("cbo.zero check\n"); +} + +static void check_no_zicboz_cpus(cpu_set_t *cpus) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_IMA_EXT_0, + }; + cpu_set_t one_cpu; + int i = 0, c = 0; + long rc; + + while (i++ < CPU_COUNT(cpus)) { + while (!CPU_ISSET(c, cpus)) + ++c; + + CPU_ZERO(&one_cpu); + CPU_SET(c, &one_cpu); + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); + assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) + ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n" + "Use taskset to select a set of harts where Zicboz\n" + "presence (present or not) is consistent for each hart\n"); + ++c; + } +} + +enum { + TEST_ZICBOZ, + TEST_NO_ZICBOZ, + TEST_NO_ZICBOM, +}; + +static struct test_info { + bool enabled; + unsigned int nr_tests; + void (*test_fn)(void *arg); +} tests[] = { + [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz }, + [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz }, + [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom }, +}; + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_sigaction = &sigill_handler, + .sa_flags = SA_SIGINFO, + }; + struct riscv_hwprobe pair; + unsigned int plan = 0; + cpu_set_t cpus; + long rc; + int i; + + if (argc > 1 && !strcmp(argv[1], "--sigill")) { + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + tests[TEST_NO_ZICBOZ].enabled = true; + tests[TEST_NO_ZICBOM].enabled = true; + } + + rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); + assert(rc == 0); + + ksft_print_header(); + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0); + if (rc < 0) + ksft_exit_fail_msg("hwprobe() failed with %d\n", rc); + assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) { + tests[TEST_ZICBOZ].enabled = true; + tests[TEST_NO_ZICBOZ].enabled = false; + } else { + check_no_zicboz_cpus(&cpus); + } + + for (i = 0; i < ARRAY_SIZE(tests); ++i) + plan += tests[i].enabled ? tests[i].nr_tests : 0; + + if (plan == 0) + ksft_print_msg("No tests enabled.\n"); + else + ksft_set_plan(plan); + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (tests[i].enabled) + tests[i].test_fn(&cpus); + } + + ksft_finished(); +} diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c index 09f290a67420..c474891df307 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c @@ -1,14 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <stddef.h> -#include <asm/hwprobe.h> - -/* - * Rather than relying on having a new enough libc to define this, just do it - * ourselves. This way we don't need to be coupled to a new-enough libc to - * contain the call. - */ -long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, unsigned int flags); +#include "hwprobe.h" +#include "../../kselftest.h" int main(int argc, char **argv) { @@ -16,6 +8,9 @@ int main(int argc, char **argv) unsigned long cpus; long out; + ksft_print_header(); + ksft_set_plan(5); + /* Fake the CPU_SET ops. */ cpus = -1; @@ -25,13 +20,16 @@ int main(int argc, char **argv) */ for (long i = 0; i < 8; i++) pairs[i].key = i; + out = riscv_hwprobe(pairs, 8, 1, &cpus, 0); if (out != 0) - return -1; + ksft_exit_fail_msg("hwprobe() failed with %ld\n", out); + for (long i = 0; i < 4; ++i) { /* Fail if the kernel claims not to recognize a base key. */ if ((i < 4) && (pairs[i].key != i)) - return -2; + ksft_exit_fail_msg("Failed to recognize base key: key != i, " + "key=%ld, i=%ld\n", pairs[i].key, i); if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) continue; @@ -39,52 +37,30 @@ int main(int argc, char **argv) if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) continue; - return -3; + ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value); } - /* - * This should also work with a NULL CPU set, but should not work - * with an improperly supplied CPU set. - */ out = riscv_hwprobe(pairs, 8, 0, 0, 0); - if (out != 0) - return -4; + ksft_test_result(out == 0, "NULL CPU set\n"); out = riscv_hwprobe(pairs, 8, 0, &cpus, 0); - if (out == 0) - return -5; + ksft_test_result(out != 0, "Bad CPU set\n"); out = riscv_hwprobe(pairs, 8, 1, 0, 0); - if (out == 0) - return -6; + ksft_test_result(out != 0, "NULL CPU set with non-zero count\n"); - /* - * Check that keys work by providing one that we know exists, and - * checking to make sure the resultig pair is what we asked for. - */ pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR; out = riscv_hwprobe(pairs, 1, 1, &cpus, 0); - if (out != 0) - return -7; - if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) - return -8; + ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + "Existing key is maintained\n"); - /* - * Check that an unknown key gets overwritten with -1, - * but doesn't block elements after it. - */ pairs[0].key = 0x5555; pairs[1].key = 1; pairs[1].value = 0xAAAA; out = riscv_hwprobe(pairs, 2, 0, 0, 0); - if (out != 0) - return -9; - - if (pairs[0].key != -1) - return -10; - - if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA)) - return -11; + ksft_test_result(out == 0 && pairs[0].key == -1 && + pairs[1].key == 1 && pairs[1].value != 0xAAAA, + "Unknown key overwritten with -1 and doesn't block other elements\n"); - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h new file mode 100644 index 000000000000..721b0ce73a56 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_RISCV_HWPROBE_H +#define SELFTEST_RISCV_HWPROBE_H +#include <stddef.h> +#include <asm/hwprobe.h> + +/* + * Rather than relying on having a new enough libc to define this, just do it + * ourselves. This way we don't need to be coupled to a new-enough libc to + * contain the call. + */ +long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, unsigned int flags); + +#endif diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 92336721321a..ae2b33c21c45 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -85,6 +85,48 @@ void vsock_wait_remote_close(int fd) close(epollfd); } +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + struct sockaddr_vm sa_client = { + .svm_family = AF_VSOCK, + .svm_cid = VMADDR_CID_ANY, + .svm_port = bind_port, + }; + struct sockaddr_vm sa_server = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + + int client_fd, ret; + + client_fd = socket(AF_VSOCK, type, 0); + if (client_fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) { + perror("bind"); + exit(EXIT_FAILURE); + } + + timeout_begin(TIMEOUT); + do { + ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + return client_fd; +} + /* Connect to <cid, port> and return the file descriptor. */ static int vsock_connect(unsigned int cid, unsigned int port, int type) { @@ -104,6 +146,10 @@ static int vsock_connect(unsigned int cid, unsigned int port, int type) control_expectln("LISTENING"); fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } timeout_begin(TIMEOUT); do { @@ -132,11 +178,8 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port) return vsock_connect(cid, port, SOCK_SEQPACKET); } -/* Listen on <cid, port> and return the first incoming connection. The remote - * address is stored to clientaddrp. clientaddrp may be NULL. - */ -static int vsock_accept(unsigned int cid, unsigned int port, - struct sockaddr_vm *clientaddrp, int type) +/* Listen on <cid, port> and return the file descriptor. */ +static int vsock_listen(unsigned int cid, unsigned int port, int type) { union { struct sockaddr sa; @@ -148,16 +191,13 @@ static int vsock_accept(unsigned int cid, unsigned int port, .svm_cid = cid, }, }; - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } clientaddr; - socklen_t clientaddr_len = sizeof(clientaddr.svm); int fd; - int client_fd; - int old_errno; fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { perror("bind"); @@ -169,6 +209,24 @@ static int vsock_accept(unsigned int cid, unsigned int port, exit(EXIT_FAILURE); } + return fd; +} + +/* Listen on <cid, port> and return the first incoming connection. The remote + * address is stored to clientaddrp. clientaddrp may be NULL. + */ +static int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + socklen_t clientaddr_len = sizeof(clientaddr.svm); + int fd, client_fd, old_errno; + + fd = vsock_listen(cid, port, type); + control_writeln("LISTENING"); timeout_begin(TIMEOUT); @@ -207,6 +265,11 @@ int vsock_stream_accept(unsigned int cid, unsigned int port, return vsock_accept(cid, port, clientaddrp, SOCK_STREAM); } +int vsock_stream_listen(unsigned int cid, unsigned int port) +{ + return vsock_listen(cid, port, SOCK_STREAM); +} + int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp) { diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index a77175d25864..03c88d0cb861 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -36,9 +36,12 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind_connect(unsigned int cid, unsigned int port, + unsigned int bind_port, int type); int vsock_seqpacket_connect(unsigned int cid, unsigned int port); int vsock_stream_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); +int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index c1f7bc9abd22..5b0e93f9996c 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1180,6 +1180,51 @@ static void test_stream_shutrd_server(const struct test_opts *opts) close(fd); } +static void test_double_bind_connect_server(const struct test_opts *opts) +{ + int listen_fd, client_fd, i; + struct sockaddr_vm sa_client; + socklen_t socklen_client = sizeof(sa_client); + + listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234); + + for (i = 0; i < 2; i++) { + control_writeln("LISTENING"); + + timeout_begin(TIMEOUT); + do { + client_fd = accept(listen_fd, (struct sockaddr *)&sa_client, + &socklen_client); + timeout_check("accept"); + } while (client_fd < 0 && errno == EINTR); + timeout_end(); + + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Waiting for remote peer to close connection */ + vsock_wait_remote_close(client_fd); + } + + close(listen_fd); +} + +static void test_double_bind_connect_client(const struct test_opts *opts) +{ + int i, client_fd; + + for (i = 0; i < 2; i++) { + /* Wait until server is ready to accept a new connection */ + control_expectln("LISTENING"); + + client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM); + + close(client_fd); + } +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1285,6 +1330,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_empty_errq_client, .run_server = test_stream_msgzcopy_empty_errq_server, }, + { + .name = "SOCK_STREAM double bind connect", + .run_client = test_double_bind_connect_client, + .run_server = test_double_bind_connect_server, + }, {}, }; |