diff options
320 files changed, 6539 insertions, 3246 deletions
@@ -2090,7 +2090,7 @@ S: Kuala Lumpur, Malaysia N: Mohit Kumar D: ST Microelectronics SPEAr13xx PCI host bridge driver -D: Synopsys Designware PCI host bridge driver +D: Synopsys DesignWare PCI host bridge driver N: Gabor Kuti E: seasons@falcon.sch.bme.hu diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 86b0e8ec8ad7..05496622b4ef 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2764,6 +2764,15 @@ If the dependencies are under your control, you can turn on cpu0_hotplug. + nps_mtm_hs_ctr= [KNL,ARC] + This parameter sets the maximum duration, in + cycles, each HW thread of the CTOP can run + without interruptions, before HW switches it. + The actual maximum duration is 16 times this + parameter's value. + Format: integer between 1 and 255 + Default: 255 + nptcg= [IA-64] Override max number of concurrent global TLB purges which is reported from either PAL_VM_SUMMARY or SAL PALO. diff --git a/Documentation/devicetree/bindings/arc/hsdk.txt b/Documentation/devicetree/bindings/arc/hsdk.txt new file mode 100644 index 000000000000..be50654bbf61 --- /dev/null +++ b/Documentation/devicetree/bindings/arc/hsdk.txt @@ -0,0 +1,7 @@ +Synopsys DesignWare ARC HS Development Kit Device Tree Bindings +--------------------------------------------------------------------------- + +ARC HSDK Board with quad-core ARC HS38x4 in silicon. + +Required root node properties: + - compatible = "snps,hsdk"; diff --git a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt index 35a465362408..b9165b72473c 100644 --- a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt +++ b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt @@ -1,11 +1,11 @@ * Freescale 83xx and 512x PCI bridges -Freescale 83xx and 512x SOCs include the same pci bridge core. +Freescale 83xx and 512x SOCs include the same PCI bridge core. 83xx/512x specific notes: - reg: should contain two address length tuples - The first is for the internal pci bridge registers - The second is for the pci config space access registers + The first is for the internal PCI bridge registers + The second is for the PCI config space access registers Example (MPC8313ERDB) pci0: pci@e0008500 { diff --git a/Documentation/devicetree/bindings/pci/altera-pcie.txt b/Documentation/devicetree/bindings/pci/altera-pcie.txt index 2951a6a50704..495880193adc 100644 --- a/Documentation/devicetree/bindings/pci/altera-pcie.txt +++ b/Documentation/devicetree/bindings/pci/altera-pcie.txt @@ -7,21 +7,21 @@ Required properties: "Txs": TX slave port region "Cra": Control register access region - interrupt-parent: interrupt source phandle. -- interrupts: specifies the interrupt source of the parent interrupt controller. - The format of the interrupt specifier depends on the parent interrupt - controller. +- interrupts: specifies the interrupt source of the parent interrupt + controller. The format of the interrupt specifier depends + on the parent interrupt controller. - device_type: must be "pci" - #address-cells: set to <3> -- #size-cells: set to <2> +- #size-cells: set to <2> - #interrupt-cells: set to <1> -- ranges: describes the translation of addresses for root ports and standard - PCI regions. +- ranges: describes the translation of addresses for root ports and + standard PCI regions. - interrupt-map-mask and interrupt-map: standard PCI properties to define the mapping of the PCIe interface to interrupt numbers. Optional properties: -- msi-parent: Link to the hardware entity that serves as the MSI controller for this PCIe - controller. +- msi-parent: Link to the hardware entity that serves as the MSI controller + for this PCIe controller. - bus-range: PCI bus numbers covered Example @@ -45,5 +45,5 @@ Example <0 0 0 3 &pcie_0 3>, <0 0 0 4 &pcie_0 4>; ranges = <0x82000000 0x00000000 0x00000000 0xc0000000 0x00000000 0x10000000 - 0x82000000 0x00000000 0x10000000 0xd0000000 0x00000000 0x10000000>; + 0x82000000 0x00000000 0x10000000 0xd0000000 0x00000000 0x10000000>; }; diff --git a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt index 5ecaea1e6eee..4e4aee4439ea 100644 --- a/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt +++ b/Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt @@ -6,7 +6,7 @@ and thus inherits all the common properties defined in designware-pcie.txt. Required properties: - compatible: "axis,artpec6-pcie", "snps,dw-pcie" - reg: base addresses and lengths of the PCIe controller (DBI), - the phy controller, and configuration address space. + the PHY controller, and configuration address space. - reg-names: Must include the following entries: - "dbi" - "phy" diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt index b2480dd38c11..1da7ade3183c 100644 --- a/Documentation/devicetree/bindings/pci/designware-pcie.txt +++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt @@ -1,4 +1,4 @@ -* Synopsys Designware PCIe interface +* Synopsys DesignWare PCIe interface Required properties: - compatible: should contain "snps,dw-pcie" to identify the core. @@ -17,29 +17,27 @@ RC mode: properties to define the mapping of the PCIe interface to interrupt numbers. EP mode: -- num-ib-windows: number of inbound address translation - windows -- num-ob-windows: number of outbound address translation - windows +- num-ib-windows: number of inbound address translation windows +- num-ob-windows: number of outbound address translation windows Optional properties: - num-lanes: number of lanes to use (this property should be specified unless the link is brought already up in BIOS) -- reset-gpio: gpio pin number of power good signal +- reset-gpio: GPIO pin number of power good signal - clocks: Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. - clock-names: Must include the following entries: - "pcie" - "pcie_bus" RC mode: -- num-viewport: number of view ports configured in - hardware. If a platform does not specify it, the driver assumes 2. -- bus-range: PCI bus numbers covered (it is recommended - for new devicetrees to specify this property, to keep backwards - compatibility a range of 0x00-0xff is assumed if not present) +- num-viewport: number of view ports configured in hardware. If a platform + does not specify it, the driver assumes 2. +- bus-range: PCI bus numbers covered (it is recommended for new devicetrees + to specify this property, to keep backwards compatibility a range of + 0x00-0xff is assumed if not present) + EP mode: -- max-functions: maximum number of functions that can be - configured +- max-functions: maximum number of functions that can be configured Example configuration: diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt index cf92d3ba5a26..7b1e48bf172b 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt @@ -1,6 +1,6 @@ * Freescale i.MX6 PCIe interface -This PCIe host controller is based on the Synopsis Designware PCIe IP +This PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in designware-pcie.txt. Required properties: diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt index 7a579c816951..bdb7ab39d2d7 100644 --- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt +++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt @@ -1,7 +1,7 @@ HiSilicon Hip05 and Hip06 PCIe host bridge DT description -HiSilicon PCIe host controller is based on Designware PCI core. -It shares common functions with PCIe Designware core driver and inherits +HiSilicon PCIe host controller is based on the Synopsys DesignWare PCI core. +It shares common functions with the PCIe DesignWare core driver and inherits common properties defined in Documentation/devicetree/bindings/pci/designware-pci.txt. diff --git a/Documentation/devicetree/bindings/pci/kirin-pcie.txt b/Documentation/devicetree/bindings/pci/kirin-pcie.txt index 68ffa0fbcd73..6e217c63123d 100644 --- a/Documentation/devicetree/bindings/pci/kirin-pcie.txt +++ b/Documentation/devicetree/bindings/pci/kirin-pcie.txt @@ -1,8 +1,8 @@ HiSilicon Kirin SoCs PCIe host DT description -Kirin PCIe host controller is based on Designware PCI core. -It shares common functions with PCIe Designware core driver -and inherits common properties defined in +Kirin PCIe host controller is based on the Synopsys DesignWare PCI core. +It shares common functions with the PCIe DesignWare core driver and +inherits common properties defined in Documentation/devicetree/bindings/pci/designware-pci.txt. Additional properties are described here: @@ -16,7 +16,7 @@ Required properties "apb": apb Ctrl register defined by Kirin; "phy": apb PHY register defined by Kirin; "config": PCIe configuration space registers. -- reset-gpios: The gpio to generate PCIe perst assert and deassert signal. +- reset-gpios: The GPIO to generate PCIe PERST# assert and deassert signal. Optional properties: diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt b/Documentation/devicetree/bindings/pci/layerscape-pci.txt index ee1c72d5162e..c0484da0f20d 100644 --- a/Documentation/devicetree/bindings/pci/layerscape-pci.txt +++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt @@ -15,8 +15,10 @@ Required properties: - compatible: should contain the platform identifier such as: "fsl,ls1021a-pcie", "snps,dw-pcie" "fsl,ls2080a-pcie", "fsl,ls2085a-pcie", "snps,dw-pcie" + "fsl,ls2088a-pcie" + "fsl,ls1088a-pcie" "fsl,ls1046a-pcie" -- reg: base addresses and lengths of the PCIe controller +- reg: base addresses and lengths of the PCIe controller register blocks. - interrupts: A list of interrupt outputs of the controller. Must contain an entry for each entry in the interrupt-names property. - interrupt-names: Must include the following entries: diff --git a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt deleted file mode 100644 index fe80dda9bf73..000000000000 --- a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt +++ /dev/null @@ -1,130 +0,0 @@ -MediaTek Gen2 PCIe controller which is available on MT7623 series SoCs - -PCIe subsys supports single root complex (RC) with 3 Root Ports. Each root -ports supports a Gen2 1-lane Link and has PIPE interface to PHY. - -Required properties: -- compatible: Should contain "mediatek,mt7623-pcie". -- device_type: Must be "pci" -- reg: Base addresses and lengths of the PCIe controller. -- #address-cells: Address representation for root ports (must be 3) -- #size-cells: Size representation for root ports (must be 2) -- #interrupt-cells: Size representation for interrupts (must be 1) -- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties - Please refer to the standard PCI bus binding document for a more detailed - explanation. -- clocks: Must contain an entry for each entry in clock-names. - See ../clocks/clock-bindings.txt for details. -- clock-names: Must include the following entries: - - free_ck :for reference clock of PCIe subsys - - sys_ck0 :for clock of Port0 - - sys_ck1 :for clock of Port1 - - sys_ck2 :for clock of Port2 -- resets: Must contain an entry for each entry in reset-names. - See ../reset/reset.txt for details. -- reset-names: Must include the following entries: - - pcie-rst0 :port0 reset - - pcie-rst1 :port1 reset - - pcie-rst2 :port2 reset -- phys: List of PHY specifiers (used by generic PHY framework). -- phy-names : Must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the - number of PHYs as specified in *phys* property. -- power-domains: A phandle and power domain specifier pair to the power domain - which is responsible for collapsing and restoring power to the peripheral. -- bus-range: Range of bus numbers associated with this controller. -- ranges: Ranges for the PCI memory and I/O regions. - -In addition, the device tree node must have sub-nodes describing each -PCIe port interface, having the following mandatory properties: - -Required properties: -- device_type: Must be "pci" -- reg: Only the first four bytes are used to refer to the correct bus number - and device number. -- #address-cells: Must be 3 -- #size-cells: Must be 2 -- #interrupt-cells: Must be 1 -- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties - Please refer to the standard PCI bus binding document for a more detailed - explanation. -- ranges: Sub-ranges distributed from the PCIe controller node. An empty - property is sufficient. -- num-lanes: Number of lanes to use for this port. - -Examples: - - hifsys: syscon@1a000000 { - compatible = "mediatek,mt7623-hifsys", - "mediatek,mt2701-hifsys", - "syscon"; - reg = <0 0x1a000000 0 0x1000>; - #clock-cells = <1>; - #reset-cells = <1>; - }; - - pcie: pcie-controller@1a140000 { - compatible = "mediatek,mt7623-pcie"; - device_type = "pci"; - reg = <0 0x1a140000 0 0x1000>, /* PCIe shared registers */ - <0 0x1a142000 0 0x1000>, /* Port0 registers */ - <0 0x1a143000 0 0x1000>, /* Port1 registers */ - <0 0x1a144000 0 0x1000>; /* Port2 registers */ - #address-cells = <3>; - #size-cells = <2>; - #interrupt-cells = <1>; - interrupt-map-mask = <0xf800 0 0 0>; - interrupt-map = <0x0000 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>, - <0x0800 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>, - <0x1000 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>; - clocks = <&topckgen CLK_TOP_ETHIF_SEL>, - <&hifsys CLK_HIFSYS_PCIE0>, - <&hifsys CLK_HIFSYS_PCIE1>, - <&hifsys CLK_HIFSYS_PCIE2>; - clock-names = "free_ck", "sys_ck0", "sys_ck1", "sys_ck2"; - resets = <&hifsys MT2701_HIFSYS_PCIE0_RST>, - <&hifsys MT2701_HIFSYS_PCIE1_RST>, - <&hifsys MT2701_HIFSYS_PCIE2_RST>; - reset-names = "pcie-rst0", "pcie-rst1", "pcie-rst2"; - phys = <&pcie0_phy>, <&pcie1_phy>, <&pcie2_phy>; - phy-names = "pcie-phy0", "pcie-phy1", "pcie-phy2"; - power-domains = <&scpsys MT2701_POWER_DOMAIN_HIF>; - bus-range = <0x00 0xff>; - ranges = <0x81000000 0 0x1a160000 0 0x1a160000 0 0x00010000 /* I/O space */ - 0x83000000 0 0x60000000 0 0x60000000 0 0x10000000>; /* memory space */ - - pcie@0,0 { - device_type = "pci"; - reg = <0x0000 0 0 0 0>; - #address-cells = <3>; - #size-cells = <2>; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>; - ranges; - num-lanes = <1>; - }; - - pcie@1,0 { - device_type = "pci"; - reg = <0x0800 0 0 0 0>; - #address-cells = <3>; - #size-cells = <2>; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>; - ranges; - num-lanes = <1>; - }; - - pcie@2,0 { - device_type = "pci"; - reg = <0x1000 0 0 0 0>; - #address-cells = <3>; - #size-cells = <2>; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>; - ranges; - num-lanes = <1>; - }; - }; diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie.txt b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt new file mode 100644 index 000000000000..3a6ce55dd310 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt @@ -0,0 +1,284 @@ +MediaTek Gen2 PCIe controller + +Required properties: +- compatible: Should contain one of the following strings: + "mediatek,mt2701-pcie" + "mediatek,mt2712-pcie" + "mediatek,mt7622-pcie" + "mediatek,mt7623-pcie" +- device_type: Must be "pci" +- reg: Base addresses and lengths of the PCIe subsys and root ports. +- reg-names: Names of the above areas to use during resource lookup. +- #address-cells: Address representation for root ports (must be 3) +- #size-cells: Size representation for root ports (must be 2) +- clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names: + Mandatory entries: + - sys_ckN :transaction layer and data link layer clock + Required entries for MT2701/MT7623: + - free_ck :for reference clock of PCIe subsys + Required entries for MT2712/MT7622: + - ahb_ckN :AHB slave interface operating clock for CSR access and RC + initiated MMIO access + Required entries for MT7622: + - axi_ckN :application layer MMIO channel operating clock + - aux_ckN :pe2_mac_bridge and pe2_mac_core operating clock when + pcie_mac_ck/pcie_pipe_ck is turned off + - obff_ckN :OBFF functional block operating clock + - pipe_ckN :LTSSM and PHY/MAC layer operating clock + where N starting from 0 to one less than the number of root ports. +- phys: List of PHY specifiers (used by generic PHY framework). +- phy-names : Must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the + number of PHYs as specified in *phys* property. +- power-domains: A phandle and power domain specifier pair to the power domain + which is responsible for collapsing and restoring power to the peripheral. +- bus-range: Range of bus numbers associated with this controller. +- ranges: Ranges for the PCI memory and I/O regions. + +Required properties for MT7623/MT2701: +- #interrupt-cells: Size representation for interrupts (must be 1) +- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties + Please refer to the standard PCI bus binding document for a more detailed + explanation. +- resets: Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names: Must be "pcie-rst0", "pcie-rst1", "pcie-rstN".. based on the + number of root ports. + +Required properties for MT2712/MT7622: +-interrupts: A list of interrupt outputs of the controller, must have one + entry for each PCIe port + +In addition, the device tree node must have sub-nodes describing each +PCIe port interface, having the following mandatory properties: + +Required properties: +- device_type: Must be "pci" +- reg: Only the first four bytes are used to refer to the correct bus number + and device number. +- #address-cells: Must be 3 +- #size-cells: Must be 2 +- #interrupt-cells: Must be 1 +- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties + Please refer to the standard PCI bus binding document for a more detailed + explanation. +- ranges: Sub-ranges distributed from the PCIe controller node. An empty + property is sufficient. +- num-lanes: Number of lanes to use for this port. + +Examples for MT7623: + + hifsys: syscon@1a000000 { + compatible = "mediatek,mt7623-hifsys", + "mediatek,mt2701-hifsys", + "syscon"; + reg = <0 0x1a000000 0 0x1000>; + #clock-cells = <1>; + #reset-cells = <1>; + }; + + pcie: pcie-controller@1a140000 { + compatible = "mediatek,mt7623-pcie"; + device_type = "pci"; + reg = <0 0x1a140000 0 0x1000>, /* PCIe shared registers */ + <0 0x1a142000 0 0x1000>, /* Port0 registers */ + <0 0x1a143000 0 0x1000>, /* Port1 registers */ + <0 0x1a144000 0 0x1000>; /* Port2 registers */ + reg-names = "subsys", "port0", "port1", "port2"; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-map-mask = <0xf800 0 0 0>; + interrupt-map = <0x0000 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>, + <0x0800 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>, + <0x1000 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_ETHIF_SEL>, + <&hifsys CLK_HIFSYS_PCIE0>, + <&hifsys CLK_HIFSYS_PCIE1>, + <&hifsys CLK_HIFSYS_PCIE2>; + clock-names = "free_ck", "sys_ck0", "sys_ck1", "sys_ck2"; + resets = <&hifsys MT2701_HIFSYS_PCIE0_RST>, + <&hifsys MT2701_HIFSYS_PCIE1_RST>, + <&hifsys MT2701_HIFSYS_PCIE2_RST>; + reset-names = "pcie-rst0", "pcie-rst1", "pcie-rst2"; + phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>, + <&pcie2_phy PHY_TYPE_PCIE>; + phy-names = "pcie-phy0", "pcie-phy1", "pcie-phy2"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_HIF>; + bus-range = <0x00 0xff>; + ranges = <0x81000000 0 0x1a160000 0 0x1a160000 0 0x00010000 /* I/O space */ + 0x83000000 0 0x60000000 0 0x60000000 0 0x10000000>; /* memory space */ + + pcie@0,0 { + device_type = "pci"; + reg = <0x0000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &sysirq GIC_SPI 193 IRQ_TYPE_LEVEL_LOW>; + ranges; + num-lanes = <1>; + }; + + pcie@1,0 { + device_type = "pci"; + reg = <0x0800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &sysirq GIC_SPI 194 IRQ_TYPE_LEVEL_LOW>; + ranges; + num-lanes = <1>; + }; + + pcie@2,0 { + device_type = "pci"; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &sysirq GIC_SPI 195 IRQ_TYPE_LEVEL_LOW>; + ranges; + num-lanes = <1>; + }; + }; + +Examples for MT2712: + pcie: pcie@11700000 { + compatible = "mediatek,mt2712-pcie"; + device_type = "pci"; + reg = <0 0x11700000 0 0x1000>, + <0 0x112ff000 0 0x1000>; + reg-names = "port0", "port1"; + #address-cells = <3>; + #size-cells = <2>; + interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&topckgen CLK_TOP_PE2_MAC_P0_SEL>, + <&topckgen CLK_TOP_PE2_MAC_P1_SEL>, + <&pericfg CLK_PERI_PCIE0>, + <&pericfg CLK_PERI_PCIE1>; + clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1"; + phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>; + phy-names = "pcie-phy0", "pcie-phy1"; + bus-range = <0x00 0xff>; + ranges = <0x82000000 0 0x20000000 0x0 0x20000000 0 0x10000000>; + + pcie0: pcie@0,0 { + device_type = "pci"; + reg = <0x0000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + ranges; + num-lanes = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc0 0>, + <0 0 0 2 &pcie_intc0 1>, + <0 0 0 3 &pcie_intc0 2>, + <0 0 0 4 &pcie_intc0 3>; + pcie_intc0: interrupt-controller { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + }; + + pcie1: pcie@1,0 { + device_type = "pci"; + reg = <0x0800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + ranges; + num-lanes = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc1 0>, + <0 0 0 2 &pcie_intc1 1>, + <0 0 0 3 &pcie_intc1 2>, + <0 0 0 4 &pcie_intc1 3>; + pcie_intc1: interrupt-controller { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + }; + }; + +Examples for MT7622: + pcie: pcie@1a140000 { + compatible = "mediatek,mt7622-pcie"; + device_type = "pci"; + reg = <0 0x1a140000 0 0x1000>, + <0 0x1a143000 0 0x1000>, + <0 0x1a145000 0 0x1000>; + reg-names = "subsys", "port0", "port1"; + #address-cells = <3>; + #size-cells = <2>; + interrupts = <GIC_SPI 228 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 229 IRQ_TYPE_LEVEL_LOW>; + clocks = <&pciesys CLK_PCIE_P0_MAC_EN>, + <&pciesys CLK_PCIE_P1_MAC_EN>, + <&pciesys CLK_PCIE_P0_AHB_EN>, + <&pciesys CLK_PCIE_P1_AHB_EN>, + <&pciesys CLK_PCIE_P0_AUX_EN>, + <&pciesys CLK_PCIE_P1_AUX_EN>, + <&pciesys CLK_PCIE_P0_AXI_EN>, + <&pciesys CLK_PCIE_P1_AXI_EN>, + <&pciesys CLK_PCIE_P0_OBFF_EN>, + <&pciesys CLK_PCIE_P1_OBFF_EN>, + <&pciesys CLK_PCIE_P0_PIPE_EN>, + <&pciesys CLK_PCIE_P1_PIPE_EN>; + clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1", + "aux_ck0", "aux_ck1", "axi_ck0", "axi_ck1", + "obff_ck0", "obff_ck1", "pipe_ck0", "pipe_ck1"; + phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>; + phy-names = "pcie-phy0", "pcie-phy1"; + power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>; + bus-range = <0x00 0xff>; + ranges = <0x82000000 0 0x20000000 0x0 0x20000000 0 0x10000000>; + + pcie0: pcie@0,0 { + device_type = "pci"; + reg = <0x0000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + ranges; + num-lanes = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc0 0>, + <0 0 0 2 &pcie_intc0 1>, + <0 0 0 3 &pcie_intc0 2>, + <0 0 0 4 &pcie_intc0 3>; + pcie_intc0: interrupt-controller { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + }; + + pcie1: pcie@1,0 { + device_type = "pci"; + reg = <0x0800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + ranges; + num-lanes = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc1 0>, + <0 0 0 2 &pcie_intc1 1>, + <0 0 0 3 &pcie_intc1 2>, + <0 0 0 4 &pcie_intc1 3>; + pcie_intc1: interrupt-controller { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/pci/mvebu-pci.txt b/Documentation/devicetree/bindings/pci/mvebu-pci.txt index 9c7fce69570b..127ae1f53e5a 100644 --- a/Documentation/devicetree/bindings/pci/mvebu-pci.txt +++ b/Documentation/devicetree/bindings/pci/mvebu-pci.txt @@ -77,7 +77,7 @@ and the following optional properties: - marvell,pcie-lane: the physical PCIe lane number, for ports having multiple lanes. If this property is not found, we assume that the value is 0. -- reset-gpios: optional gpio to PERST# +- reset-gpios: optional GPIO to PERST# - reset-delay-us: delay in us to wait after reset de-assertion, if not specified will default to 100ms, as required by the PCIe specification. diff --git a/Documentation/devicetree/bindings/pci/pci-armada8k.txt b/Documentation/devicetree/bindings/pci/pci-armada8k.txt index 6b115fd10fec..c1e4c3d10a74 100644 --- a/Documentation/devicetree/bindings/pci/pci-armada8k.txt +++ b/Documentation/devicetree/bindings/pci/pci-armada8k.txt @@ -1,6 +1,6 @@ * Marvell Armada 7K/8K PCIe interface -This PCIe host controller is based on the Synopsis Designware PCIe IP +This PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in designware-pcie.txt. Required properties: diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt index d08a4d51108f..7e05487544ed 100644 --- a/Documentation/devicetree/bindings/pci/pci-keystone.txt +++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt @@ -1,12 +1,12 @@ TI Keystone PCIe interface -Keystone PCI host Controller is based on Designware PCI h/w version 3.65. -It shares common functions with PCIe Designware core driver and inherit -common properties defined in +Keystone PCI host Controller is based on the Synopsys DesignWare PCI +hardware version 3.65. It shares common functions with the PCIe DesignWare +core driver and inherits common properties defined in Documentation/devicetree/bindings/pci/designware-pci.txt Please refer to Documentation/devicetree/bindings/pci/designware-pci.txt -for the details of Designware DT bindings. Additional properties are +for the details of DesignWare DT bindings. Additional properties are described here as well as properties that are not applicable. Required Properties:- @@ -52,13 +52,12 @@ pcie_intc: Interrupt controller device node for Legacy IRQ chip }; Optional properties:- - phys: phandle to Generic Keystone SerDes phy for PCI - phy-names: name of the Generic Keystine SerDes phy for PCI + phys: phandle to generic Keystone SerDes PHY for PCI + phy-names: name of the generic Keystone SerDes PHY for PCI - If boot loader already does PCI link establishment, then phys and phy-names shouldn't be present. interrupts: platform interrupt for error interrupts. -Designware DT Properties not applicable for Keystone PCI +DesignWare DT Properties not applicable for Keystone PCI 1. pcie_bus clock-names not used. Instead, a phandle to phys is used. - diff --git a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt index 07a75094c5a8..3d038638612b 100644 --- a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt +++ b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt @@ -6,11 +6,14 @@ AHB. There is one bridge instance per USB port connected to the internal OHCI and EHCI controllers. Required properties: -- compatible: "renesas,pci-r8a7790" for the R8A7790 SoC; +- compatible: "renesas,pci-r8a7743" for the R8A7743 SoC; + "renesas,pci-r8a7745" for the R8A7745 SoC; + "renesas,pci-r8a7790" for the R8A7790 SoC; "renesas,pci-r8a7791" for the R8A7791 SoC; "renesas,pci-r8a7793" for the R8A7793 SoC; "renesas,pci-r8a7794" for the R8A7794 SoC; - "renesas,pci-rcar-gen2" for a generic R-Car Gen2 compatible device + "renesas,pci-rcar-gen2" for a generic R-Car Gen2 or + RZ/G1 compatible device. When compatible with the generic version, nodes must list the diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt b/Documentation/devicetree/bindings/pci/qcom,pcie.txt index 9d418b71774f..3c9d321b3d3b 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie.txt +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt @@ -9,6 +9,7 @@ - "qcom,pcie-apq8084" for apq8084 - "qcom,pcie-msm8996" for msm8996 or apq8096 - "qcom,pcie-ipq4019" for ipq4019 + - "qcom,pcie-ipq8074" for ipq8074 - reg: Usage: required @@ -20,7 +21,7 @@ Value type: <stringlist> Definition: Must include the following entries - "parf" Qualcomm specific registers - - "dbi" Designware PCIe registers + - "dbi" DesignWare PCIe registers - "elbi" External local bus interface registers - "config" PCIe configuration space @@ -105,6 +106,16 @@ - "bus_master" Master AXI clock - "bus_slave" Slave AXI clock +- clock-names: + Usage: required for ipq8074 + Value type: <stringlist> + Definition: Should contain the following entries + - "iface" PCIe to SysNOC BIU clock + - "axi_m" AXI Master clock + - "axi_s" AXI Slave clock + - "ahb" AHB clock + - "aux" Auxiliary clock + - resets: Usage: required Value type: <prop-encoded-array> @@ -144,6 +155,18 @@ - "ahb" AHB reset - "phy_ahb" PHY AHB reset +- reset-names: + Usage: required for ipq8074 + Value type: <stringlist> + Definition: Should contain the following entries + - "pipe" PIPE reset + - "sleep" Sleep reset + - "sticky" Core Sticky reset + - "axi_m" AXI Master reset + - "axi_s" AXI Slave reset + - "ahb" AHB Reset + - "axi_m_sticky" AXI Master Sticky reset + - power-domains: Usage: required for apq8084 and msm8996/apq8096 Value type: <prop-encoded-array> @@ -180,7 +203,7 @@ - <name>-gpios: Usage: optional Value type: <prop-encoded-array> - Definition: List of phandle and gpio specifier pairs. Should contain + Definition: List of phandle and GPIO specifier pairs. Should contain - "perst-gpios" PCIe endpoint reset signal line - "wake-gpios" PCIe endpoint wake signal line diff --git a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt index 8e0a1eb0acbb..a04ab1b76211 100644 --- a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt +++ b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt @@ -71,7 +71,7 @@ - interrupt-map: standard PCI properties to define the mapping of the PCI interface to interrupt numbers. - The PCI host bridge node migh have additional sub-nodes representing + The PCI host bridge node might have additional sub-nodes representing the onboard PCI devices/PCI slots. Each such sub-node must have the following mandatory properties: diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt index a7e3dd43b2a8..76ba3a61d1a3 100644 --- a/Documentation/devicetree/bindings/pci/rcar-pci.txt +++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt @@ -14,7 +14,7 @@ compatible: "renesas,pcie-r8a7779" for the R8A7779 SoC; SoC-specific version corresponding to the platform first followed by the generic version. -- reg: base address and length of the pcie controller registers. +- reg: base address and length of the PCIe controller registers. - #address-cells: set to <3> - #size-cells: set to <2> - bus-range: PCI bus numbers covered @@ -25,15 +25,14 @@ compatible: "renesas,pcie-r8a7779" for the R8A7779 SoC; source for hardware related interrupts (e.g. link speed change). - #interrupt-cells: set to <1> - interrupt-map-mask and interrupt-map: standard PCI properties - to define the mapping of the PCIe interface to interrupt - numbers. + to define the mapping of the PCIe interface to interrupt numbers. - clocks: from common clock binding: clock specifiers for the PCIe controller and PCIe bus clocks. - clock-names: from common clock binding: should be "pcie" and "pcie_bus". Example: -SoC specific DT Entry: +SoC-specific DT Entry: pcie: pcie@fe000000 { compatible = "renesas,pcie-r8a7791", "renesas,pcie-rcar-gen2"; diff --git a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt index 1453a734c2f5..af34c65773fd 100644 --- a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt +++ b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt @@ -19,8 +19,6 @@ Required properties: - "pm" - msi-map: Maps a Requester ID to an MSI controller and associated msi-specifier data. See ./pci-msi.txt -- phys: From PHY bindings: Phandle for the Generic PHY for PCIe. -- phy-names: MUST be "pcie-phy". - interrupts: Three interrupt entries must be specified. - interrupt-names: Must include the following names - "sys" @@ -42,11 +40,24 @@ Required properties: interrupt source. The value must be 1. - interrupt-map-mask and interrupt-map: standard PCI properties +Required properties for legacy PHY model (deprecated): +- phys: From PHY bindings: Phandle for the Generic PHY for PCIe. +- phy-names: MUST be "pcie-phy". + +Required properties for per-lane PHY model (preferred): +- phys: Must contain an phandle to a PHY for each entry in phy-names. +- phy-names: Must include 4 entries for all 4 lanes even if some of + them won't be used for your cases. Entries are of the form "pcie-phy-N": + where N ranges from 0 to 3. + (see example below and you MUST also refer to ../phy/rockchip-pcie-phy.txt + for changing the #phy-cells of phy node to support it) + Optional Property: - aspm-no-l0s: RC won't support ASPM L0s. This property is needed if using 24MHz OSC for RC's PHY. -- ep-gpios: contain the entry for pre-reset gpio +- ep-gpios: contain the entry for pre-reset GPIO - num-lanes: number of lanes to use +- vpcie12v-supply: The phandle to the 12v regulator to use for PCIe. - vpcie3v3-supply: The phandle to the 3.3v regulator to use for PCIe. - vpcie1v8-supply: The phandle to the 1.8v regulator to use for PCIe. - vpcie0v9-supply: The phandle to the 0.9v regulator to use for PCIe. @@ -95,6 +106,7 @@ pcie0: pcie@f8000000 { <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>; reset-names = "core", "mgmt", "mgmt-sticky", "pipe", "pm", "pclk", "aclk"; + /* deprecated legacy PHY model */ phys = <&pcie_phy>; phy-names = "pcie-phy"; pinctrl-names = "default"; @@ -111,3 +123,13 @@ pcie0: pcie@f8000000 { #interrupt-cells = <1>; }; }; + +pcie0: pcie@f8000000 { + ... + + /* preferred per-lane PHY model */ + phys = <&pcie_phy 0>, <&pcie_phy 1>, <&pcie_phy 2>, <&pcie_phy 3>; + phy-names = "pcie-phy-0", "pcie-phy-1", "pcie-phy-2", "pcie-phy-3"; + + ... +}; diff --git a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt index 7d3b09474657..34a11bfbfb60 100644 --- a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt +++ b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt @@ -1,29 +1,29 @@ * Samsung Exynos 5440 PCIe interface -This PCIe host controller is based on the Synopsis Designware PCIe IP +This PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in designware-pcie.txt. Required properties: - compatible: "samsung,exynos5440-pcie" -- reg: base addresses and lengths of the pcie controller, - the phy controller, additional register for the phy controller. - (Registers for the phy controller are DEPRECATED. +- reg: base addresses and lengths of the PCIe controller, + the PHY controller, additional register for the PHY controller. + (Registers for the PHY controller are DEPRECATED. Use the PHY framework.) - reg-names : First name should be set to "elbi". - And use the "config" instead of getting the confgiruation address space + And use the "config" instead of getting the configuration address space from "ranges". - NOTE: When use the "config" property, reg-names must be set. + NOTE: When using the "config" property, reg-names must be set. - interrupts: A list of interrupt outputs for level interrupt, pulse interrupt, special interrupt. -- phys: From PHY binding. Phandle for the Generic PHY. +- phys: From PHY binding. Phandle for the generic PHY. Refer to Documentation/devicetree/bindings/phy/samsung-phy.txt -Other common properties refer to - Documentation/devicetree/binding/pci/designware-pcie.txt +For other common properties, refer to + Documentation/devicetree/bindings/pci/designware-pcie.txt Example: -SoC specific DT Entry: +SoC-specific DT Entry: pcie@290000 { compatible = "samsung,exynos5440-pcie", "snps,dw-pcie"; @@ -83,7 +83,7 @@ With using PHY framework: ... }; -Board specific DT Entry: +Board-specific DT Entry: pcie@290000 { reset-gpio = <&pin_ctrl 5 0>; diff --git a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt index 49ea76da7718..d5a14f5dad46 100644 --- a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt +++ b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt @@ -1,12 +1,12 @@ SPEAr13XX PCIe DT detail: ================================ -SPEAr13XX uses synopsis designware PCIe controller and ST MiPHY as phy +SPEAr13XX uses the Synopsys DesignWare PCIe controller and ST MiPHY as PHY controller. Required properties: -- compatible : should be "st,spear1340-pcie", "snps,dw-pcie". -- phys : phandle to phy node associated with pcie controller +- compatible : should be "st,spear1340-pcie", "snps,dw-pcie". +- phys : phandle to PHY node associated with PCIe controller - phy-names : must be "pcie-phy" - All other definitions as per generic PCI bindings diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt index 6a07c96227e0..7f7af3044016 100644 --- a/Documentation/devicetree/bindings/pci/ti-pci.txt +++ b/Documentation/devicetree/bindings/pci/ti-pci.txt @@ -1,6 +1,6 @@ TI PCI Controllers -PCIe Designware Controller +PCIe DesignWare Controller - compatible: Should be "ti,dra7-pcie" for RC Should be "ti,dra7-pcie-ep" for EP - phys : list of PHY specifiers (used by generic PHY framework) @@ -13,7 +13,7 @@ PCIe Designware Controller HOST MODE ========= - reg : Two register ranges as listed in the reg-names property - - reg-names : The first entry must be "ti-conf" for the TI specific registers + - reg-names : The first entry must be "ti-conf" for the TI-specific registers The second entry must be "rc-dbics" for the DesignWare PCIe registers The third entry must be "config" for the PCIe configuration space @@ -30,7 +30,7 @@ HOST MODE DEVICE MODE =========== - reg : Four register ranges as listed in the reg-names property - - reg-names : "ti-conf" for the TI specific registers + - reg-names : "ti-conf" for the TI-specific registers "ep_dbics" for the standard configuration registers as they are locally accessed within the DIF CS space "ep_dbics2" for the standard configuration registers as @@ -46,7 +46,7 @@ DEVICE MODE access. Optional Property: - - gpios : Should be added if a gpio line is required to drive PERST# line + - gpios : Should be added if a GPIO line is required to drive PERST# line NOTE: Two DT nodes may be added for each PCI controller; one for host mode and another for device mode. So in order for PCI to diff --git a/Documentation/devicetree/bindings/pci/versatile.txt b/Documentation/devicetree/bindings/pci/versatile.txt index ebd1e7d0403e..0a702b13d2ac 100644 --- a/Documentation/devicetree/bindings/pci/versatile.txt +++ b/Documentation/devicetree/bindings/pci/versatile.txt @@ -5,7 +5,7 @@ PCI host controller found on the ARM Versatile PB board's FPGA. Required properties: - compatible: should contain "arm,versatile-pci" to identify the Versatile PCI controller. -- reg: base addresses and lengths of the pci controller. There must be 3 +- reg: base addresses and lengths of the PCI controller. There must be 3 entries: - Versatile-specific registers - Self Config space diff --git a/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt b/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt index 54bac7f8860c..85d9b95234f7 100644 --- a/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt +++ b/Documentation/devicetree/bindings/pci/xgene-pci-msi.txt @@ -4,7 +4,7 @@ Required properties: - compatible: should be "apm,xgene1-msi" to identify X-Gene v1 PCIe MSI controller block. -- msi-controller: indicates that this is X-Gene v1 PCIe MSI controller node +- msi-controller: indicates that this is an X-Gene v1 PCIe MSI controller node - reg: physical base address (0x79000000) and length (0x900000) for controller registers. These registers include the MSI termination address and data registers as well as the MSI interrupt status registers. @@ -13,7 +13,8 @@ Required properties: interrupt number 0x10 to 0x1f. - interrupt-names: not required -Each PCIe node needs to have property msi-parent that points to msi controller node +Each PCIe node needs to have property msi-parent that points to an MSI +controller node Examples: diff --git a/Documentation/devicetree/bindings/pci/xgene-pci.txt b/Documentation/devicetree/bindings/pci/xgene-pci.txt index 1070b068c7c6..6fd2decfa66c 100644 --- a/Documentation/devicetree/bindings/pci/xgene-pci.txt +++ b/Documentation/devicetree/bindings/pci/xgene-pci.txt @@ -8,7 +8,7 @@ Required properties: property. - reg-names: Must include the following entries: "csr": controller configuration registers. - "cfg": pcie configuration space registers. + "cfg": PCIe configuration space registers. - #address-cells: set to <3> - #size-cells: set to <2> - ranges: ranges for the outbound memory, I/O regions. @@ -21,11 +21,11 @@ Required properties: Optional properties: - status: Either "ok" or "disabled". -- dma-coherent: Present if dma operations are coherent +- dma-coherent: Present if DMA operations are coherent Example: -SoC specific DT Entry: +SoC-specific DT Entry: pcie0: pcie@1f2b0000 { status = "disabled"; @@ -51,7 +51,7 @@ SoC specific DT Entry: }; -Board specific DT Entry: +Board-specific DT Entry: &pcie0 { status = "ok"; }; diff --git a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt index 3259798a1192..01bf7fdf4c19 100644 --- a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt +++ b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt @@ -15,9 +15,9 @@ Required properties: - device_type: must be "pci" - interrupts: Should contain NWL PCIe interrupt - interrupt-names: Must include the following entries: - "msi1, msi0": interrupt asserted when MSI is received + "msi1, msi0": interrupt asserted when an MSI is received "intx": interrupt asserted when a legacy interrupt is received - "misc": interrupt asserted when miscellaneous is received + "misc": interrupt asserted when miscellaneous interrupt is received - interrupt-map-mask and interrupt-map: standard PCI properties to define the mapping of the PCI interface to interrupt numbers. - ranges: ranges for the PCI memory regions (I/O space region is not @@ -26,7 +26,8 @@ Required properties: detailed explanation - msi-controller: indicates that this is MSI controller node - msi-parent: MSI parent of the root complex itself -- legacy-interrupt-controller: Interrupt controller device node for Legacy interrupts +- legacy-interrupt-controller: Interrupt controller device node for Legacy + interrupts - interrupt-controller: identifies the node as an interrupt controller - #interrupt-cells: should be set to 1 - #address-cells: specifies the number of cells needed to encode an diff --git a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt index 0f6222a672ce..b496042f1f44 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt +++ b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt @@ -3,7 +3,6 @@ Rockchip PCIE PHY Required properties: - compatible: rockchip,rk3399-pcie-phy - - #phy-cells: must be 0 - clocks: Must contain an entry in clock-names. See ../clocks/clock-bindings.txt for details. - clock-names: Must be "refclk" @@ -11,6 +10,12 @@ Required properties: See ../reset/reset.txt for details. - reset-names: Must be "phy" +Required properties for legacy PHY mode (deprecated): + - #phy-cells: must be 0 + +Required properties for per-lane PHY mode (preferred): + - #phy-cells: must be 1 + Example: grf: syscon@ff770000 { diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index ba2e7d254842..00b6dfed573c 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -121,8 +121,9 @@ space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/. Sysfs device attributes for the sensors and fan are on the thinkpad_hwmon device's sysfs attribute space, but you should locate it looking for a hwmon device with the name attribute of "thinkpad", or -better yet, through libsensors. - +better yet, through libsensors. For 4.14+ sysfs attributes were moved to the +hwmon device (/sys/bus/platform/devices/thinkpad_hwmon/hwmon/hwmon? or +/sys/class/hwmon/hwmon?). Driver version -------------- @@ -1478,3 +1479,7 @@ Sysfs interface changelog: 0x020700: Support for mute-only mixers. Volume control in read-only mode by default. Marker for ALSA mixer support. + +0x030000: Thermal and fan sysfs attributes were moved to the hwmon + device instead of being attached to the backing platform + device. diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index b2f60ca8b60c..b3ce12643553 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -83,6 +83,11 @@ Groups: Bits for undefined preemption levels are RAZ/WI. + Note that this differs from a CPU's view of the APRs on hardware in which + a GIC without the security extensions expose group 0 and group 1 active + priorities in separate register groups, whereas we show a combined view + similar to GICv2's GICH_APR. + For historical reasons and to provide ABI compatibility with userspace we export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask field in the lower 5 bits of a word, meaning that userspace must always diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 903fc926860b..95ca68d663a4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -176,7 +176,8 @@ Architectures: s390 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH -Allows user space to set/get the TOD clock extension (u8). +Allows user space to set/get the TOD clock extension (u8) (superseded by +KVM_S390_VM_TOD_EXT). Parameters: address of a buffer in user space to store the data (u8) to Returns: -EFAULT if the given address is not accessible from kernel space @@ -190,6 +191,17 @@ the POP (u64). Parameters: address of a buffer in user space to store the data (u64) to Returns: -EFAULT if the given address is not accessible from kernel space +3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT +Allows user space to set/get bits 0-63 of the TOD clock register as defined in +the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it +also allows user space to get/set it. If the guest CPU model does not support +it, it is stored as 0 and not allowed to be set to a value != 0. + +Parameters: address of a buffer in user space to store the data + (kvm_s390_vm_tod_clock) to +Returns: -EFAULT if the given address is not accessible from kernel space + -EINVAL if setting the TOD clock extension to != 0 is not supported + 4. GROUP: KVM_S390_VM_CRYPTO Architectures: s390 diff --git a/MAINTAINERS b/MAINTAINERS index 109c5d9a04c4..7f32b510fdea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7471,18 +7471,30 @@ L: kvm@vger.kernel.org W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git S: Supported -F: Documentation/*/kvm*.txt F: Documentation/virtual/kvm/ -F: arch/*/kvm/ -F: arch/x86/kernel/kvm.c -F: arch/x86/kernel/kvmclock.c -F: arch/*/include/asm/kvm* -F: include/linux/kvm* +F: include/trace/events/kvm.h +F: include/uapi/asm-generic/kvm* F: include/uapi/linux/kvm* -F: virt/kvm/ +F: include/asm-generic/kvm* +F: include/linux/kvm* +F: include/kvm/iodev.h +F: virt/kvm/* F: tools/kvm/ -KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V +KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) +M: Paolo Bonzini <pbonzini@redhat.com> +M: Radim KrÄmář <rkrcmar@redhat.com> +L: kvm@vger.kernel.org +W: http://www.linux-kvm.org +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +S: Supported +F: arch/x86/kvm/ +F: arch/x86/include/uapi/asm/kvm* +F: arch/x86/include/asm/kvm* +F: arch/x86/kernel/kvm.c +F: arch/x86/kernel/kvmclock.c + +KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) M: Joerg Roedel <joro@8bytes.org> L: kvm@vger.kernel.org W: http://www.linux-kvm.org/ @@ -7490,7 +7502,7 @@ S: Maintained F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c -KERNEL VIRTUAL MACHINE (KVM) FOR ARM +KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) M: Christoffer Dall <christoffer.dall@linaro.org> M: Marc Zyngier <marc.zyngier@arm.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -7504,14 +7516,16 @@ F: arch/arm/kvm/ F: virt/kvm/arm/ F: include/kvm/arm_* -KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC +KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) M: Alexander Graf <agraf@suse.com> L: kvm-ppc@vger.kernel.org W: http://www.linux-kvm.org/ T: git git://github.com/agraf/linux-2.6.git S: Supported +F: arch/powerpc/include/uapi/asm/kvm* F: arch/powerpc/include/asm/kvm* F: arch/powerpc/kvm/ +F: arch/powerpc/kernel/kvm* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Christoffer Dall <christoffer.dall@linaro.org> @@ -7538,7 +7552,8 @@ L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git S: Supported -F: Documentation/s390/kvm.txt +F: arch/s390/include/uapi/asm/kvm* +F: arch/s390/include/asm/gmap.h F: arch/s390/include/asm/kvm* F: arch/s390/kvm/ F: arch/s390/mm/gmap.c @@ -10243,6 +10258,7 @@ F: drivers/pci/dwc/*imx6* PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD) M: Keith Busch <keith.busch@intel.com> +M: Jonathan Derrick <jonathan.derrick@intel.com> L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/host/vmd.c @@ -10289,7 +10305,7 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained F: drivers/pci/dwc/pci-exynos.c -PCI DRIVER FOR SYNOPSIS DESIGNWARE +PCI DRIVER FOR SYNOPSYS DESIGNWARE M: Jingoo Han <jingoohan1@gmail.com> M: Joao Pinto <Joao.Pinto@synopsys.com> L: linux-pci@vger.kernel.org diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 8322df174bbf..564114eb85e1 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -312,8 +312,9 @@ common_init_pci(void) { struct pci_controller *hose; struct list_head resources; + struct pci_host_bridge *bridge; struct pci_bus *bus; - int next_busno; + int ret, next_busno; int need_domain_info = 0; u32 pci_mem_end; u32 sg_base; @@ -336,11 +337,25 @@ common_init_pci(void) pci_add_resource_offset(&resources, hose->mem_space, hose->mem_space->start); - bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops, - hose, &resources); - if (!bus) + bridge = pci_alloc_host_bridge(0); + if (!bridge) continue; - hose->bus = bus; + + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = NULL; + bridge->sysdata = hose; + bridge->busnr = next_busno; + bridge->ops = alpha_mv.pci_ops; + bridge->swizzle_irq = alpha_mv.pci_swizzle; + bridge->map_irq = alpha_mv.pci_map_irq; + + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); + continue; + } + + bus = hose->bus = bridge->bus; hose->need_domain_info = need_domain_info; next_busno = bus->busn_res.end + 1; /* Don't allow 8-bit bus number overflow inside the hose - @@ -354,7 +369,6 @@ common_init_pci(void) pcibios_claim_console_setup(); pci_assign_unassigned_resources(); - pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); for (hose = hose_head; hose; hose = hose->next) { bus = hose->bus; if (bus) @@ -362,7 +376,6 @@ common_init_pci(void) } } - struct pci_controller * __init alloc_pci_controller(void) { diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 2cfaa0e5c577..8ae04a121186 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -194,22 +194,46 @@ static struct resource irongate_mem = { .name = "Irongate PCI MEM", .flags = IORESOURCE_MEM, }; +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 255, + .flags = IORESOURCE_BUS, +}; void __init nautilus_init_pci(void) { struct pci_controller *hose = hose_head; + struct pci_host_bridge *bridge; struct pci_bus *bus; struct pci_dev *irongate; unsigned long bus_align, bus_size, pci_mem; unsigned long memtop = max_low_pfn << PAGE_SHIFT; + int ret; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return; + + pci_add_resource(&bridge->windows, &ioport_resource); + pci_add_resource(&bridge->windows, &iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->dev.parent = NULL; + bridge->sysdata = hose; + bridge->busnr = 0; + bridge->ops = alpha_mv.pci_ops; + bridge->swizzle_irq = alpha_mv.pci_swizzle; + bridge->map_irq = alpha_mv.pci_map_irq; /* Scan our single hose. */ - bus = pci_scan_bus(0, alpha_mv.pci_ops, hose); - if (!bus) + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); return; + } - hose->bus = bus; + bus = hose->bus = bridge->bus; pcibios_claim_one_bus(bus); irongate = pci_get_bus_and_slot(0, 0); @@ -254,7 +278,6 @@ nautilus_init_pci(void) /* pci_common_swizzle() relies on bus->self being NULL for the root bus, so just clear it. */ bus->self = NULL; - pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); pci_bus_add_devices(bus); } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 7db85ab00c52..a598641eed98 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -100,6 +100,7 @@ source "arch/arc/plat-tb10x/Kconfig" source "arch/arc/plat-axs10x/Kconfig" #New platform adds here source "arch/arc/plat-eznps/Kconfig" +source "arch/arc/plat-hsdk/Kconfig" endmenu @@ -418,7 +419,7 @@ endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" config LINUX_LINK_BASE - hex "Linux Link Address" + hex "Kernel link address" default "0x80000000" help ARC700 divides the 32 bit phy address space into two equal halves @@ -431,6 +432,14 @@ config LINUX_LINK_BASE If you don't know what the above means, leave this setting alone. This needs to match memory start address specified in Device Tree +config LINUX_RAM_BASE + hex "RAM base address" + default LINUX_LINK_BASE + help + By default Linux is linked at base of RAM. However in some special + cases (such as HSDK), Linux can't be linked at start of DDR, hence + this option. + config HIGHMEM bool "High Memory Support" select ARCH_DISCONTIGMEM_ENABLE diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 3a61cfcc38c0..3a4b52b7e09d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -111,6 +111,7 @@ core-y += arch/arc/plat-sim/ core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ core-$(CONFIG_ARC_PLAT_EZNPS) += arch/arc/plat-eznps/ +core-$(CONFIG_ARC_SOC_HSDK) += arch/arc/plat-hsdk/ ifdef CONFIG_ARC_PLAT_EZNPS KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index a380ffa1a458..fdc266504ada 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -99,7 +99,7 @@ memory { device_type = "memory"; - /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + /* CONFIG_LINUX_RAM_BASE needs to match low mem start */ reg = <0x0 0x80000000 0x0 0x1b000000>; /* (512 - 32) MiB */ }; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index cc9239ef8d08..4e6e9f57e790 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -24,10 +24,17 @@ ranges = <0x00000000 0x0 0xf0000000 0x10000000>; - core_clk: core_clk { + input_clk: input-clk { #clock-cells = <0>; compatible = "fixed-clock"; - clock-frequency = <90000000>; + clock-frequency = <33333333>; + }; + + core_clk: core-clk@80 { + compatible = "snps,axs10x-arc-pll-clock"; + reg = <0x80 0x10>, <0x100 0x10>; + #clock-cells = <0>; + clocks = <&input_clk>; }; core_intc: archs-intc@cpu { @@ -102,7 +109,7 @@ memory { device_type = "memory"; - /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + /* CONFIG_LINUX_RAM_BASE needs to match low mem start */ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */ 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */ }; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 4ebb2170abec..63954a8b0100 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -24,10 +24,17 @@ ranges = <0x00000000 0x0 0xf0000000 0x10000000>; - core_clk: core_clk { + input_clk: input-clk { #clock-cells = <0>; compatible = "fixed-clock"; - clock-frequency = <100000000>; + clock-frequency = <33333333>; + }; + + core_clk: core-clk@80 { + compatible = "snps,axs10x-arc-pll-clock"; + reg = <0x80 0x10>, <0x100 0x10>; + #clock-cells = <0>; + clocks = <&input_clk>; }; core_intc: archs-intc@cpu { @@ -108,7 +115,7 @@ memory { device_type = "memory"; - /* CONFIG_KERNEL_RAM_BASE_ADDRESS needs to match low mem start */ + /* CONFIG_LINUX_RAM_BASE needs to match low mem start */ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */ 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */ }; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts new file mode 100644 index 000000000000..229d13adbce4 --- /dev/null +++ b/arch/arc/boot/dts/hsdk.dts @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device Tree for ARC HS Development Kit + */ +/dts-v1/; + +#include <dt-bindings/net/ti-dp83867.h> + +/ { + model = "snps,hsdk"; + compatible = "snps,hsdk"; + + #address-cells = <1>; + #size-cells = <1>; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <0>; + clocks = <&core_clk>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <1>; + clocks = <&core_clk>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <2>; + clocks = <&core_clk>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <3>; + clocks = <&core_clk>; + }; + }; + + core_clk: core-clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <500000000>; + }; + + cpu_intc: cpu-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&cpu_intc>; + }; + + arcpct: pct { + compatible = "snps,archs-pct"; + }; + + /* TIMER0 with interrupt for clockevent */ + timer { + compatible = "snps,arc-timer"; + interrupts = <16>; + interrupt-parent = <&cpu_intc>; + clocks = <&core_clk>; + }; + + /* 64-bit Global Free Running Counter */ + gfrc { + compatible = "snps,archs-timer-gfrc"; + clocks = <&core_clk>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&idu_intc>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + serial: serial@5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <33330000>; + interrupts = <6>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + gmacclk: gmacclk { + compatible = "fixed-clock"; + clock-frequency = <400000000>; + #clock-cells = <0>; + }; + + mmcclk_ciu: mmcclk-ciu { + compatible = "fixed-clock"; + clock-frequency = <100000000>; + #clock-cells = <0>; + }; + + mmcclk_biu: mmcclk-biu { + compatible = "fixed-clock"; + clock-frequency = <400000000>; + #clock-cells = <0>; + }; + + ethernet@8000 { + #interrupt-cells = <1>; + compatible = "snps,dwmac"; + reg = <0x8000 0x2000>; + interrupts = <10>; + interrupt-names = "macirq"; + phy-mode = "rgmii"; + snps,pbl = <32>; + clocks = <&gmacclk>; + clock-names = "stmmaceth"; + phy-handle = <&phy0>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + phy0: ethernet-phy@0 { + reg = <0>; + ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>; + ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>; + ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>; + }; + }; + }; + + ohci@60000 { + compatible = "snps,hsdk-v1.0-ohci", "generic-ohci"; + reg = <0x60000 0x100>; + interrupts = <15>; + }; + + ehci@40000 { + compatible = "snps,hsdk-v1.0-ehci", "generic-ehci"; + reg = <0x40000 0x100>; + interrupts = <15>; + }; + + mmc@a000 { + compatible = "altr,socfpga-dw-mshc"; + reg = <0xa000 0x400>; + num-slots = <1>; + fifo-depth = <16>; + card-detect-delay = <200>; + clocks = <&mmcclk_biu>, <&mmcclk_ciu>; + clock-names = "biu", "ciu"; + interrupts = <12>; + bus-width = <4>; + }; + }; + + memory@80000000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "memory"; + reg = <0x80000000 0x40000000>; /* 1 GiB */ + }; +}; diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 3772c40c245e..8d787b251f73 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -18,7 +18,7 @@ memory { device_type = "memory"; - /* CONFIG_LINUX_LINK_BASE needs to match low mem start */ + /* CONFIG_LINUX_RAM_BASE needs to match low mem start */ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MB low mem */ 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ }; diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig new file mode 100644 index 000000000000..9a3fcf446388 --- /dev/null +++ b/arch/arc/configs/hsdk_defconfig @@ -0,0 +1,80 @@ +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +CONFIG_SYSVIPC=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_ARC_SOC_HSDK=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +CONFIG_LINUX_LINK_BASE=0x90000000 +CONFIG_LINUX_RAM_BASE=0x80000000 +CONFIG_ARC_BUILTIN_DTB_NAME="hsdk" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +CONFIG_STMMAC_ETH=y +CONFIG_MICREL_PHY=y +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_FB_UDL=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_DW=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set +CONFIG_CRYPTO_ECHAINIV=y diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 02fd1cece6ef..8486f328cc5d 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -47,7 +47,8 @@ : "r"(data), "r"(ptr)); \ }) -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +/* Largest line length for either L1 or L2 is 128 bytes */ +#define ARCH_DMA_MINALIGN 128 extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); @@ -95,6 +96,8 @@ extern unsigned long perip_base, perip_end; #define ARC_REG_SLC_CTRL 0x903 #define ARC_REG_SLC_FLUSH 0x904 #define ARC_REG_SLC_INVALIDATE 0x905 +#define ARC_AUX_SLC_IVDL 0x910 +#define ARC_AUX_SLC_FLDL 0x912 #define ARC_REG_SLC_RGN_START 0x914 #define ARC_REG_SLC_RGN_START1 0x915 #define ARC_REG_SLC_RGN_END 0x916 diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 14c310f2e0b1..ec36d5b6d435 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -192,6 +192,12 @@ PUSHAX lp_start PUSHAX erbta +#ifdef CONFIG_ARC_PLAT_EZNPS + .word CTOP_INST_SCHD_RW + PUSHAX CTOP_AUX_GPA1 + PUSHAX CTOP_AUX_EFLAGS +#endif + lr r9, [ecr] st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */ .endm @@ -208,6 +214,12 @@ * by hardware and that is not good. *-------------------------------------------------------------*/ .macro EXCEPTION_EPILOGUE +#ifdef CONFIG_ARC_PLAT_EZNPS + .word CTOP_INST_SCHD_RW + POPAX CTOP_AUX_EFLAGS + POPAX CTOP_AUX_GPA1 +#endif + POPAX erbta POPAX lp_start POPAX lp_end @@ -265,6 +277,12 @@ PUSHAX lp_end PUSHAX lp_start PUSHAX bta_l\LVL\() + +#ifdef CONFIG_ARC_PLAT_EZNPS + .word CTOP_INST_SCHD_RW + PUSHAX CTOP_AUX_GPA1 + PUSHAX CTOP_AUX_EFLAGS +#endif .endm /*-------------------------------------------------------------- @@ -277,6 +295,12 @@ * by hardware and that is not good. *-------------------------------------------------------------*/ .macro INTERRUPT_EPILOGUE LVL +#ifdef CONFIG_ARC_PLAT_EZNPS + .word CTOP_INST_SCHD_RW + POPAX CTOP_AUX_EFLAGS + POPAX CTOP_AUX_GPA1 +#endif + POPAX bta_l\LVL\() POPAX lp_start POPAX lp_end diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index a64c447b0337..8a4f77ea3238 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -47,9 +47,6 @@ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \ (ARCV2_IRQ_DEF_PRIO << 1)) -/* SLEEP needs default irq priority (<=) which can interrupt the doze */ -#define ISA_SLEEP_ARG (0x10 | ARCV2_IRQ_DEF_PRIO) - #ifndef __ASSEMBLY__ /* diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 4c6eed80cd8b..fcb80171fc34 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -43,8 +43,6 @@ #define ISA_INIT_STATUS_BITS STATUS_IE_MASK -#define ISA_SLEEP_ARG 0x3 - #ifndef __ASSEMBLY__ /****************************************************************** diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 296c3426a6ad..109baa06831c 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -85,7 +85,7 @@ typedef pte_t * pgtable_t; */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_LINK_BASE) +#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_RAM_BASE) #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 4104a0839214..d400a2161935 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -27,6 +27,13 @@ struct arc_fpu { }; #endif +#ifdef CONFIG_ARC_PLAT_EZNPS +struct eznps_dp { + unsigned int eflags; + unsigned int gpa1; +}; +#endif + /* Arch specific stuff which needs to be saved per task. * However these items are not so important so as to earn a place in * struct thread_info @@ -38,6 +45,9 @@ struct thread_struct { #ifdef CONFIG_ARC_FPU_SAVE_RESTORE struct arc_fpu fpu; #endif +#ifdef CONFIG_ARC_PLAT_EZNPS + struct eznps_dp dp; +#endif }; #define INIT_THREAD { \ diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 5297faa8a378..5a8cb22724a1 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -19,6 +19,11 @@ #ifdef CONFIG_ISA_ARCOMPACT struct pt_regs { +#ifdef CONFIG_ARC_PLAT_EZNPS + unsigned long eflags; /* Extended FLAGS */ + unsigned long gpa1; /* General Purpose Aux */ +#endif + /* Real registers */ unsigned long bta; /* bta_l1, bta_l2, erbta */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index a325e6a36523..47efc8451b70 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -247,9 +247,15 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: ex %0, [%1] \n" +#ifdef CONFIG_EZNPS_MTM_EXT + " .word %3 \n" +#endif " breq %0, %2, 1b \n" : "+&r" (val) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) +#ifdef CONFIG_EZNPS_MTM_EXT + , "i"(CTOP_INST_SCHD_RW) +#endif : "memory"); /* @@ -291,6 +297,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) */ smp_mb(); + /* + * EX is not really required here, a simple STore of 0 suffices. + * However this causes tasklist livelocks in SystemC based SMP virtual + * platforms where the systemc core scheduler uses EX as a cue for + * moving to next core. Do a git log of this file for details + */ __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (val) diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 1b171ab5fec0..f7d07feeea61 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -26,10 +26,19 @@ extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); #endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */ +#ifdef CONFIG_ARC_PLAT_EZNPS +extern void dp_save_restore(struct task_struct *p, struct task_struct *n); +#define ARC_EZNPS_DP_PREV(p, n) dp_save_restore(p, n) +#else +#define ARC_EZNPS_DP_PREV(p, n) + +#endif /* !CONFIG_ARC_PLAT_EZNPS */ + struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n); #define switch_to(prev, next, last) \ do { \ + ARC_EZNPS_DP_PREV(prev, next); \ ARC_FPU_PREV(prev, next); \ last = __switch_to(prev, next);\ ARC_FPU_NEXT(next); \ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 8942c5c3b4c5..2dc5f4296d44 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -12,7 +12,6 @@ obj-y := arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o -obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index 3b67f538f142..521ef3521a1c 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -29,8 +29,9 @@ static void __init arc_set_early_base_baud(unsigned long dt_root) { if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x")) arc_base_baud = 166666666; /* Fixed 166.6MHz clk (TB10x) */ - else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp")) - arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */ + else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") || + of_flat_dt_is_compatible(dt_root, "snps,hsdk")) + arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x & HSDK) */ else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps")) arc_base_baud = 800000000; /* Fixed 800MHz clk (NPS) */ else diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 9211707634dc..f285dbb28066 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -25,12 +25,12 @@ * * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well - * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't + * -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't * need ptregs anymore * * Vineetg: Oct 2009 * -In a rare scenario, Process gets a Priv-V exception and gets scheduled - * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains + * out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains * active (AE bit enabled). This causes a double fault for a subseq valid * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. * Instr Error could also cause similar scenario, so same there as well. @@ -59,7 +59,7 @@ */ #include <linux/errno.h> -#include <linux/linkage.h> /* {EXTRY,EXIT} */ +#include <linux/linkage.h> /* {ENTRY,EXIT} */ #include <asm/entry.h> #include <asm/irqflags.h> @@ -80,8 +80,8 @@ .align 4 /* Each entry in the vector table must occupy 2 words. Since it is a jump - * across sections (.vector to .text) we are gauranteed that 'j somewhere' - * will use the 'j limm' form of the intrsuction as long as somewhere is in + * across sections (.vector to .text) we are guaranteed that 'j somewhere' + * will use the 'j limm' form of the instruction as long as somewhere is in * a section other than .vector. */ @@ -105,13 +105,13 @@ VECTOR handle_interrupt_level1 ; Other devices ; ******************** Exceptions ********************** VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) -VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21) +VECTOR EV_TLBMissI ; 0x108, Instruction TLB miss (0x21) VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) ; or Misaligned Access VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) VECTOR EV_Trap ; 0x128, Trap exception (0x25) -VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) +VECTOR EV_Extension ; 0x130, Extn Instruction Excp (0x26) .rept 24 VECTOR reserved ; Reserved Exceptions @@ -199,7 +199,7 @@ END(handle_interrupt_level2) ; --------------------------------------------- ; User Mode Memory Bus Error Interrupt Handler -; (Kernel mode memory errors handled via seperate exception vectors) +; (Kernel mode memory errors handled via separate exception vectors) ; --------------------------------------------- ENTRY(mem_service) @@ -273,7 +273,7 @@ ENTRY(EV_TLBProtV) ;------ (5) Type of Protection Violation? ---------- ; ; ProtV Hardware Exception is triggered for Access Faults of 2 types - ; -Access Violaton : 00_23_(00|01|02|03)_00 + ; -Access Violation : 00_23_(00|01|02|03)_00 ; x r w r+w ; -Unaligned Access : 00_23_04_00 ; @@ -327,7 +327,7 @@ END(call_do_page_fault) .Lrestore_regs: - # Interrpts are actually disabled from this point on, but will get + # Interrupts are actually disabled from this point on, but will get # reenabled after we return from interrupt/exception. # But irq tracer needs to be told now... TRACE_ASM_IRQ_ENABLE @@ -335,7 +335,7 @@ END(call_do_page_fault) lr r10, [status32] ; Restore REG File. In case multiple Events outstanding, - ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None + ; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None ; Note that we use realtime STATUS32 (not pt_regs->status32) to ; decide that. diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1eea99beecc3..85d9ea4a0acc 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -92,6 +92,12 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp + ; hardware auto-disables MMU, re-enable it to allow kernel vaddr + ; access for say stack unwinding of modules for crash dumps + lr r3, [ARC_REG_PID] + or r3, r3, MMU_ENABLE + sr r3, [ARC_REG_PID] + lsr r3, r2, 8 bmsk r3, r3, 7 brne r3, ECR_C_MCHK_DUP_TLB, 1f diff --git a/arch/arc/kernel/pcibios.c b/arch/arc/kernel/pcibios.c deleted file mode 100644 index 72e1d73d0bd6..000000000000 --- a/arch/arc/kernel/pcibios.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/pci.h> - -/* - * We don't have to worry about legacy ISA devices, so nothing to do here - */ -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} - -void pcibios_fixup_bus(struct pci_bus *bus) -{ -} diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 2a018de6d6cd..5ac3b547453f 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -79,15 +79,40 @@ done: return uval; } +#ifdef CONFIG_ISA_ARCV2 + void arch_cpu_idle(void) { - /* sleep, but enable all interrupts before committing */ + /* Re-enable interrupts <= default irq priority before commiting SLEEP */ + const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; + __asm__ __volatile__( "sleep %0 \n" : - :"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */ + :"I"(arg)); /* can't be "r" has to be embedded const */ +} + +#elif defined(CONFIG_EZNPS_MTM_EXT) /* ARC700 variant in NPS */ + +void arch_cpu_idle(void) +{ + /* only the calling HW thread needs to sleep */ + __asm__ __volatile__( + ".word %0 \n" + : + :"i"(CTOP_INST_HWSCHD_WFT_IE12)); +} + +#else /* ARC700 */ + +void arch_cpu_idle(void) +{ + /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */ + __asm__ __volatile__("sleep 0x3 \n"); } +#endif + asmlinkage void ret_from_fork(void); /* @@ -209,6 +234,10 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) */ regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS; +#ifdef CONFIG_EZNPS_MTM_EXT + regs->eflags = 0; +#endif + /* bogus seed values for debugging */ regs->lp_start = 0x10; regs->lp_end = 0x80; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 666613fde91d..c4ffb441716c 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -385,13 +385,13 @@ void setup_processor(void) read_arc_build_cfg_regs(); arc_init_IRQ(); - printk(arc_cpu_mumbojumbo(cpu_id, str, sizeof(str))); + pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str))); arc_mmu_init(); arc_cache_init(); - printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); - printk(arc_platform_smp_cpuinfo()); + pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); + pr_info("%s", arc_platform_smp_cpuinfo()); arc_chk_core_config(); } diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index ff83e78d0cfb..bcd7c9fc5d0f 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -80,7 +80,7 @@ int name(unsigned long address, struct pt_regs *regs) \ DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC) DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC) DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC) -DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR) +DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) @@ -103,7 +103,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs, */ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) { - die("Machine Check Exception", regs, address); + die("Unhandled Machine Check Exception", regs, address); } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index f9caf79186d4..7e94476f3994 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -140,7 +140,7 @@ static void show_ecr_verbose(struct pt_regs *regs) } else if (vec == ECR_V_ITLB_MISS) { pr_cont("Insn could not be fetched\n"); } else if (vec == ECR_V_MACH_CHK) { - pr_cont("%s\n", (cause_code == 0x0) ? + pr_cont("Machine Check (%s)\n", (cause_code == 0x0) ? "Double Fault" : "Other Fatal Err"); } else if (vec == ECR_V_PROTV) { @@ -233,6 +233,9 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, { current->thread.fault_address = address; + /* Show fault description */ + pr_info("\n%s\n", str); + /* Caller and Callee regs */ show_regs(regs); diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 7db283b46ebd..eee924dfffa6 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -652,7 +652,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) +noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -715,6 +715,58 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) #endif } +noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + + const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1); + unsigned int ctrl, cmd; + unsigned long flags; + int num_lines; + + spin_lock_irqsave(&lock, flags); + + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + + sz += paddr & ~SLC_LINE_MASK; + paddr &= SLC_LINE_MASK; + + num_lines = DIV_ROUND_UP(sz, l2_line_sz); + + while (num_lines-- > 0) { + write_aux_reg(cmd, paddr); + paddr += l2_line_sz; + } + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +#define slc_op(paddr, sz, op) slc_op_rgn(paddr, sz, op) + noinline static void slc_entire_op(const int op) { unsigned int ctrl, r = ARC_REG_SLC_CTRL; @@ -1095,7 +1147,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) */ noinline void __init arc_ioc_setup(void) { - unsigned int ap_sz; + unsigned int ioc_base, mem_sz; /* Flush + invalidate + disable L1 dcache */ __dc_disable(); @@ -1104,18 +1156,29 @@ noinline void __init arc_ioc_setup(void) if (read_aux_reg(ARC_REG_SLC_BCR)) slc_entire_op(OP_FLUSH_N_INV); - /* IOC Aperture start: TDB: handle non default CONFIG_LINUX_LINK_BASE */ - write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); - /* - * IOC Aperture size: - * decoded as 2 ^ (SIZE + 2) KB: so setting 0x11 implies 512M + * currently IOC Aperture covers entire DDR * TBD: fix for PGU + 1GB of low mem * TBD: fix for PAE */ - ap_sz = order_base_2(arc_get_mem_sz()/1024) - 2; - write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, ap_sz); + mem_sz = arc_get_mem_sz(); + + if (!is_power_of_2(mem_sz) || mem_sz < 4096) + panic("IOC Aperture size must be power of 2 larger than 4KB"); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512MB, 0x12 implies 1GB... + */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2); + + /* for now assume kernel base is start of IOC aperture */ + ioc_base = CONFIG_LINUX_RAM_BASE; + + if (ioc_base % mem_sz != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); @@ -1207,7 +1270,7 @@ void __ref arc_cache_init(void) unsigned int __maybe_unused cpu = smp_processor_id(); char str[256]; - printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str))); if (!cpu) arc_cache_init_master(); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 162c97528872..a0b7bd6d030d 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -207,7 +207,7 @@ no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source - * when it acesses user-memory. When it fails in one + * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 8c9415ed6280..ba145065c579 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -26,7 +26,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE; static unsigned long low_mem_sz; #ifdef CONFIG_HIGHMEM @@ -63,7 +63,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) if (!low_mem_sz) { if (base != low_mem_start) - panic("CONFIG_LINUX_LINK_BASE != DT memory { }"); + panic("CONFIG_LINUX_RAM_BASE != DT memory { }"); low_mem_sz = size; in_use = 1; @@ -161,7 +161,7 @@ void __init setup_arch_memory(void) * We can't use the helper free_area_init(zones[]) because it uses * PAGE_OFFSET to compute the @min_low_pfn which would be wrong * when our kernel doesn't start at PAGE_OFFSET, i.e. - * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE + * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE */ free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index b181f3ee38aa..8ceefbf72fb0 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -821,7 +821,7 @@ void arc_mmu_init(void) char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - printk(arc_mmu_mumbojumbo(0, str, sizeof(str))); + pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); /* * Can't be done in processor.h due to header include depenedencies @@ -908,9 +908,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, local_irq_save(flags); - /* re-enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); - /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index b30e4e36bb00..0e1e47a67c73 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -274,6 +274,13 @@ ex_saved_reg1: .macro COMMIT_ENTRY_TO_MMU #if (CONFIG_ARC_MMU_VER < 4) +#ifdef CONFIG_EZNPS_MTM_EXT + /* verify if entry for this vaddr+ASID already exists */ + sr TLBProbe, [ARC_REG_TLBCOMMAND] + lr r0, [ARC_REG_TLBINDEX] + bbit0 r0, 31, 88f +#endif + /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -287,6 +294,8 @@ ex_saved_reg1: #else sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] #endif + +88: .endm diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 38ff349d7f2a..f1ac6790da5f 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -80,22 +80,6 @@ static void __init axs10x_enable_gpio_intc_wire(void) iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN); } -static inline void __init -write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg) -{ - unsigned int loops = 128 * 1024, ctr; - - iowrite32(value, reg); - - ctr = loops; - while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */ - cpu_relax(); - - ctr = loops; - while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */ - cpu_relax(); -} - static void __init axs10x_print_board_ver(unsigned int creg, const char *str) { union ver { @@ -314,7 +298,6 @@ static void __init axs101_early_init(void) #ifdef CONFIG_AXS103 -#define AXC003_CGU 0xF0000000 #define AXC003_CREG 0xF0001000 #define AXC003_MST_AXI_TUNNEL 0 #define AXC003_MST_HS38 1 @@ -324,131 +307,38 @@ static void __init axs101_early_init(void) #define CREG_CPU_TUN_IO_CTRL (AXC003_CREG + 0x494) -union pll_reg { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6; -#else - unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17; -#endif - }; - unsigned int val; -}; - -static unsigned int __init axs103_get_freq(void) -{ - union pll_reg idiv, fbdiv, odiv; - unsigned int f = 33333333; - - idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0); - fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4); - odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8); - - if (idiv.bypass != 1) - f = f / (idiv.low + idiv.high); - - if (fbdiv.bypass != 1) - f = f * (fbdiv.low + fbdiv.high); - - if (odiv.bypass != 1) - f = f / (odiv.low + odiv.high); - - f = (f + 500000) / 1000000; /* Rounding */ - return f; -} - -static inline unsigned int __init encode_div(unsigned int id, int upd) -{ - union pll_reg div; - - div.val = 0; - - div.noupd = !upd; - div.bypass = id == 1 ? 1 : 0; - div.edge = (id%2 == 0) ? 0 : 1; /* 0 = rising */ - div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1; - div.high = id >> 1; - - return div.val; -} - -noinline static void __init -axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od) -{ - write_cgu_reg(encode_div(id, 0), - (void __iomem *)AXC003_CGU + 0x80 + 0, - (void __iomem *)AXC003_CGU + 0x110); - - write_cgu_reg(encode_div(fd, 0), - (void __iomem *)AXC003_CGU + 0x80 + 4, - (void __iomem *)AXC003_CGU + 0x110); - - write_cgu_reg(encode_div(od, 1), - (void __iomem *)AXC003_CGU + 0x80 + 8, - (void __iomem *)AXC003_CGU + 0x110); -} - static void __init axs103_early_init(void) { - int offset = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); - const struct fdt_property *prop = fdt_get_property(initial_boot_params, - offset, - "clock-frequency", - NULL); - u32 freq = be32_to_cpu(*(u32*)(prop->data)) / 1000000, orig = freq; - +#ifdef CONFIG_ARC_MCIP /* * AXS103 configurations for SMP/QUAD configurations share device tree - * which defaults to 90 MHz. However recent failures of Quad config + * which defaults to 100 MHz. However recent failures of Quad config * revealed P&R timing violations so clamp it down to safe 50 MHz * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack - * - * This hack is really hacky as of now. Fix it properly by getting the - * number of cores as return value of platform's early SMP callback + * of fudging the freq in DT */ -#ifdef CONFIG_ARC_MCIP unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; - if (num_cores > 2) - freq = 50; -#endif - - switch (freq) { - case 33: - axs103_set_freq(1, 1, 1); - break; - case 50: - axs103_set_freq(1, 30, 20); - break; - case 75: - axs103_set_freq(2, 45, 10); - break; - case 90: - axs103_set_freq(2, 54, 10); - break; - case 100: - axs103_set_freq(1, 30, 10); - break; - case 125: - axs103_set_freq(2, 45, 6); - break; - default: + if (num_cores > 2) { + u32 freq = 50, orig; /* - * In this case, core_frequency derived from - * DT "clock-frequency" might not match with board value. - * Hence update it to match the board value. + * TODO: use cpu node "cpu-freq" param instead of platform-specific + * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu. */ - freq = axs103_get_freq(); - break; - } - - pr_info("Freq is %dMHz\n", freq); - - /* Patching .dtb in-place with new core clock value */ - if (freq != orig ) { - freq = cpu_to_be32(freq * 1000000); - fdt_setprop_inplace(initial_boot_params, offset, - "clock-frequency", &freq, sizeof(freq)); + int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); + const struct fdt_property *prop; + + prop = fdt_get_property(initial_boot_params, off, + "clock-frequency", NULL); + orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; + + /* Patching .dtb in-place with new core clock value */ + if (freq != orig ) { + freq = cpu_to_be32(freq * 1000000); + fdt_setprop_inplace(initial_boot_params, off, + "clock-frequency", &freq, sizeof(freq)); + } } +#endif /* Memory maps already config in pre-bootloader */ diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 1595a38e50cd..e151e2067886 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -12,8 +12,8 @@ menuconfig ARC_PLAT_EZNPS help Support for EZchip development platforms, based on ARC700 cores. - We handle few flavours: - - Hardware Emulator AKA HE which is FPGA based chasis + We handle few flavors: + - Hardware Emulator AKA HE which is FPGA based chassis - Simulator based on MetaWare nSIM - NPS400 chip based on ASIC @@ -32,3 +32,25 @@ config EZNPS_MTM_EXT any of them seem like CPU from Linux point of view. All threads within same core share the execution unit of the core and HW scheduler round robin between them. + +config EZNPS_MEM_ERROR_ALIGN + bool "ARC-EZchip Memory error as an exception" + depends on EZNPS_MTM_EXT + default n + help + On the real chip of the NPS, user memory errors are handled + as a machine check exception, which is fatal, whereas on + simulator platform for NPS, is handled as a Level 2 interrupt + (just a stock ARC700) which is recoverable. This option makes + simulator behave like hardware. + +config EZNPS_SHARED_AUX_REGS + bool "ARC-EZchip Shared Auxiliary Registers Per Core" + depends on ARC_PLAT_EZNPS + default y + help + On the real chip of the NPS, auxiliary registers are shared between + all the cpus of the core, whereas on simulator platform for NPS, + each cpu has a different set of auxiliary registers. Configuration + should be unset if auxiliary registers are not shared between the cpus + of the core, so there will be a need to initialize them per cpu. diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile index 21091b199df0..8d4371706b2f 100644 --- a/arch/arc/plat-eznps/Makefile +++ b/arch/arc/plat-eznps/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux kernel. # -obj-y := entry.o platform.o +obj-y := entry.o platform.o ctop.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c new file mode 100644 index 000000000000..030bcd070a1b --- /dev/null +++ b/arch/arc/plat-eznps/ctop.c @@ -0,0 +1,32 @@ +/* + * Copyright(c) 2015 EZchip Technologies. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + */ + +#include <linux/sched.h> +#include <asm/processor.h> +#include <plat/ctop.h> + +void dp_save_restore(struct task_struct *prev, struct task_struct *next) +{ + struct eznps_dp *prev_task_dp = &prev->thread.dp; + struct eznps_dp *next_task_dp = &next->thread.dp; + + /* Here we save all Data Plane related auxiliary registers */ + prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS); + write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags); + + prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1); + write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1); +} diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S index 328261c27cda..091c92c32ab6 100644 --- a/arch/arc/plat-eznps/entry.S +++ b/arch/arc/plat-eznps/entry.S @@ -27,7 +27,7 @@ .align 1024 ; HW requierment for restart first PC ENTRY(res_service) -#ifdef CONFIG_EZNPS_MTM_EXT +#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS) ; There is no work for HW thread id != 0 lr r3, [CTOP_AUX_THREAD_ID] cmp r3, 0 diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index ee2e32df5e90..0c7d11022d0f 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -39,6 +39,7 @@ #define CTOP_AUX_LOGIC_CORE_ID (CTOP_AUX_BASE + 0x018) #define CTOP_AUX_MT_CTRL (CTOP_AUX_BASE + 0x020) #define CTOP_AUX_HW_COMPLY (CTOP_AUX_BASE + 0x024) +#define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C) #define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030) #define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080) #define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088) @@ -46,6 +47,7 @@ #define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300) /* EZchip core instructions */ +#define CTOP_INST_HWSCHD_WFT_IE12 0x3E6F7344 #define CTOP_INST_HWSCHD_OFF_R4 0x3C6F00BF #define CTOP_INST_HWSCHD_RESTORE_R4 0x3E6F7103 #define CTOP_INST_SCHD_RW 0x3E6F7004 diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c index aaaaffd3d940..2388de3d09ef 100644 --- a/arch/arc/plat-eznps/mtm.c +++ b/arch/arc/plat-eznps/mtm.c @@ -21,10 +21,22 @@ #include <plat/mtm.h> #include <plat/smp.h> -#define MT_CTRL_HS_CNT 0xFF +#define MT_HS_CNT_MIN 0x01 +#define MT_HS_CNT_MAX 0xFF #define MT_CTRL_ST_CNT 0xF #define NPS_NUM_HW_THREADS 0x10 +static int mtm_hs_ctr = MT_HS_CNT_MAX; + +#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN +int do_memory_error(unsigned long address, struct pt_regs *regs) +{ + die("Invalid Mem Access", regs, address); + + return 1; +} +#endif + static void mtm_init_nat(int cpu) { struct nps_host_reg_mtm_cfg mtm_cfg; @@ -98,6 +110,18 @@ void mtm_enable_core(unsigned int cpu) int i; struct nps_host_reg_aux_mt_ctrl mt_ctrl; struct nps_host_reg_mtm_cfg mtm_cfg; + struct nps_host_reg_aux_dpc dpc; + + /* + * Initializing dpc register in each CPU. + * Overwriting the init value of the DPC + * register so that CMEM and FMT virtual address + * spaces are accessible, and Data Plane HW + * facilities are enabled. + */ + dpc.ien = 1; + dpc.men = 1; + write_aux_reg(CTOP_AUX_DPC, dpc.value); if (NPS_CPU_TO_THREAD_NUM(cpu) != 0) return; @@ -118,9 +142,7 @@ void mtm_enable_core(unsigned int cpu) /* Enable HW schedule, stall counter, mtm */ mt_ctrl.value = 0; mt_ctrl.hsen = 1; - mt_ctrl.hs_cnt = MT_CTRL_HS_CNT; - mt_ctrl.sten = 1; - mt_ctrl.st_cnt = MT_CTRL_ST_CNT; + mt_ctrl.hs_cnt = mtm_hs_ctr; mt_ctrl.mten = 1; write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value); @@ -131,3 +153,23 @@ void mtm_enable_core(unsigned int cpu) */ cpu_relax(); } + +/* Verify and set the value of the mtm hs counter */ +static int __init set_mtm_hs_ctr(char *ctr_str) +{ + long hs_ctr; + int ret; + + ret = kstrtol(ctr_str, 0, &hs_ctr); + + if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { + pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", + hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX); + return -EINVAL; + } + + mtm_hs_ctr = hs_ctr; + + return 0; +} +early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr); diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig new file mode 100644 index 000000000000..5a6ed5afb009 --- /dev/null +++ b/arch/arc/plat-hsdk/Kconfig @@ -0,0 +1,9 @@ +# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +menuconfig ARC_SOC_HSDK + bool "ARC HS Development Kit SOC" diff --git a/arch/arc/plat-hsdk/Makefile b/arch/arc/plat-hsdk/Makefile new file mode 100644 index 000000000000..9a50c511a672 --- /dev/null +++ b/arch/arc/plat-hsdk/Makefile @@ -0,0 +1,9 @@ +# +# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-y := platform.o diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c new file mode 100644 index 000000000000..a2e7fd17e36d --- /dev/null +++ b/arch/arc/plat-hsdk/platform.c @@ -0,0 +1,66 @@ +/* + * ARC HSDK Platform support code + * + * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/arcregs.h> +#include <asm/io.h> +#include <asm/mach_desc.h> + +#define ARC_CCM_UNUSED_ADDR 0x60000000 + +static void __init hsdk_init_per_cpu(unsigned int cpu) +{ + /* + * By default ICCM is mapped to 0x7z while this area is used for + * kernel virtual mappings, so move it to currently unused area. + */ + if (cpuinfo_arc700[cpu].iccm.sz) + write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); + + /* + * By default DCCM is mapped to 0x8z while this area is used by kernel, + * so move it to currently unused area. + */ + if (cpuinfo_arc700[cpu].dccm.sz) + write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); +} + +#define ARC_PERIPHERAL_BASE 0xf0000000 +#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) +#define CREG_PAE (CREG_BASE + 0x180) +#define CREG_PAE_UPDATE (CREG_BASE + 0x194) + +static void __init hsdk_init_early(void) +{ + /* + * PAE remapping for DMA clients does not work due to an RTL bug, so + * CREG_PAE register must be programmed to all zeroes, otherwise it + * will cause problems with DMA to/from peripherals even if PAE40 is + * not used. + */ + + /* Default is 1, which means "PAE offset = 4GByte" */ + writel_relaxed(0, (void __iomem *) CREG_PAE); + + /* Really apply settings made above */ + writel(1, (void __iomem *) CREG_PAE_UPDATE); +} + +static const char *hsdk_compat[] __initconst = { + "snps,hsdk", + NULL, +}; + +MACHINE_START(SIMULATION, "hsdk") + .dt_compat = hsdk_compat, + .init_early = hsdk_init_early, + .init_per_cpu = hsdk_init_per_cpu, +MACHINE_END diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index ebf020b02bc8..c8781450905b 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -227,7 +227,6 @@ #define HSR_DABT_S1PTW (_AC(1, UL) << 7) #define HSR_DABT_CM (_AC(1, UL) << 8) -#define HSR_DABT_EA (_AC(1, UL) << 9) #define kvm_arm_exception_type \ {0, "RESET" }, \ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9a8a45aaf19a..98089ffd91bb 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA; -} - static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; @@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; } +static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) +{ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case FSC_SEA: + case FSC_SEA_TTW0: + case FSC_SEA_TTW1: + case FSC_SEA_TTW2: + case FSC_SEA_TTW3: + case FSC_SECC: + case FSC_SECC_TTW0: + case FSC_SECC_TTW1: + case FSC_SECC_TTW2: + case FSC_SECC_TTW3: + return true; + default: + return false; + } +} + static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 54442e375354..cf8bf6bf87c4 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 69c56f7316c4..5b78ce16a87e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -238,8 +238,10 @@ linux,pci-domain = <0>; max-link-speed = <1>; msi-map = <0x0 &its 0x0 0x1000>; - phys = <&pcie_phy>; - phy-names = "pcie-phy"; + phys = <&pcie_phy 0>, <&pcie_phy 1>, + <&pcie_phy 2>, <&pcie_phy 3>; + phy-names = "pcie-phy-0", "pcie-phy-1", + "pcie-phy-2", "pcie-phy-3"; ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x1e00000 0x81000000 0x0 0xfbe00000 0x0 0xfbe00000 0x0 0x100000>; resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>, @@ -1295,7 +1297,7 @@ compatible = "rockchip,rk3399-pcie-phy"; clocks = <&cru SCLK_PCIEPHY_REF>; clock-names = "refclk"; - #phy-cells = <0>; + #phy-cells = <1>; resets = <&cru SRST_PCIEPHY>; reset-names = "phy"; status = "disabled"; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fe39e6841326..e5df3fce0008 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) -{ - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA); -} - static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); @@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } +static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +{ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case FSC_SEA: + case FSC_SEA_TTW0: + case FSC_SEA_TTW1: + case FSC_SEA_TTW2: + case FSC_SEA_TTW3: + case FSC_SECC: + case FSC_SECC_TTW0: + case FSC_SECC_TTW1: + case FSC_SECC_TTW2: + case FSC_SECC_TTW3: + return true; + default: + return false; + } +} + static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index e2b7e4f9cc31..0e2ea1c78542 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -22,23 +22,6 @@ #include <linux/pci-ecam.h> #include <linux/slab.h> -/* - * Called after each bus is probed, but before its children are examined - */ -void pcibios_fixup_bus(struct pci_bus *bus) -{ - /* nothing to do, expected to be removed in the future */ -} - -/* - * We don't have to worry about legacy ISA devices, so nothing to do here - */ -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} - #ifdef CONFIG_ACPI /* * Try to assign the IRQ number when probing a new device diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 17d8a1677a0b..7debb74843a0 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 116786d2e8e8..c77d508b7462 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r, u8 apr) { - struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; u8 idx = r->Op2 & 3; - /* - * num_pri_bits are initialized with HW supported values. - * We can rely safely on num_pri_bits even if VM has not - * restored ICC_CTLR_EL1 before restoring APnR registers. - */ - switch (vgic_v3_cpu->num_pri_bits) { - case 7: - vgic_v3_access_apr_reg(vcpu, p, apr, idx); - break; - case 6: - if (idx > 1) - goto err; - vgic_v3_access_apr_reg(vcpu, p, apr, idx); - break; - default: - if (idx > 0) - goto err; - vgic_v3_access_apr_reg(vcpu, p, apr, idx); - } + if (idx > vgic_v3_max_apr_idx(vcpu)) + goto err; + vgic_v3_access_apr_reg(vcpu, p, apr, idx); return true; err: if (!p->is_write) diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c index 394c2a73d5e2..5cc622c0225e 100644 --- a/arch/cris/arch-v32/drivers/pci/bios.c +++ b/arch/cris/arch-v32/drivers/pci/bios.c @@ -2,10 +2,6 @@ #include <linux/kernel.h> #include <hwregs/intr_vect.h> -void pcibios_fixup_bus(struct pci_bus *b) -{ -} - void pcibios_set_master(struct pci_dev *dev) { u8 lat; diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 4068bde623dc..f5ec736100ee 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -411,13 +411,6 @@ pcibios_disable_device (struct pci_dev *dev) acpi_pci_irq_disable(dev); } -resource_size_t -pcibios_align_resource (void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} - /** * ia64_pci_get_legacy_mem - generic legacy mem routine * @bus: bus to get legacy memory base address for diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 6a640be48568..3097fa2ca746 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -243,6 +243,13 @@ static struct resource mcf_pci_io = { .flags = IORESOURCE_IO, }; +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 255, + .flags = IORESOURCE_BUS, +}; + /* * Interrupt mapping and setting. */ @@ -258,6 +265,13 @@ static int mcf_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) static int __init mcf_pci_init(void) { + struct pci_host_bridge *bridge; + int ret; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return -ENOMEM; + pr_info("ColdFire: PCI bus initialization...\n"); /* Reset the external PCI bus */ @@ -312,14 +326,28 @@ static int __init mcf_pci_init(void) set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(200)); - rootbus = pci_scan_bus(0, &mcf_pci_ops, NULL); - if (!rootbus) - return -ENODEV; + + pci_add_resource(&bridge->windows, &ioport_resource); + pci_add_resource(&bridge->windows, &iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->dev.parent = NULL; + bridge->sysdata = NULL; + bridge->busnr = 0; + bridge->ops = &mcf_pci_ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = mcf_pci_map_irq; + + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); + return ret; + } + + rootbus = bridge->bus; rootbus->resource[0] = &mcf_pci_io; rootbus->resource[1] = &mcf_pci_mem; - pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq); pci_bus_size_bridges(rootbus); pci_bus_assign_resources(rootbus); pci_bus_add_devices(rootbus); diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index efd4983cb697..114b93488193 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -81,9 +81,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, #define HAVE_ARCH_PCI_RESOURCE_TO_USER -extern void pcibios_setup_bus_devices(struct pci_bus *bus); -extern void pcibios_setup_bus_self(struct pci_bus *bus); - /* This part of code was originally in xilinx-pci.h */ #ifdef CONFIG_PCI_XILINX extern void __init xilinx_pci_init(void); diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 180f4755ca66..ae79e8638d50 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -678,144 +678,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); -/* This function tries to figure out if a bridge resource has been initialized - * by the firmware or not. It doesn't have to be absolutely bullet proof, but - * things go more smoothly when it gets it right. It should covers cases such - * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges - */ -static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus, - struct resource *res) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - struct pci_dev *dev = bus->self; - resource_size_t offset; - u16 command; - int i; - - /* Job is a bit different between memory and IO */ - if (res->flags & IORESOURCE_MEM) { - /* If the BAR is non-0 (res != pci_mem_offset) then it's - * probably been initialized by somebody - */ - if (res->start != hose->pci_mem_offset) - return 0; - - /* The BAR is 0, let's check if memory decoding is enabled on - * the bridge. If not, we consider it unassigned - */ - pci_read_config_word(dev, PCI_COMMAND, &command); - if ((command & PCI_COMMAND_MEMORY) == 0) - return 1; - - /* Memory decoding is enabled and the BAR is 0. If any of - * the bridge resources covers that starting address (0 then - * it's good enough for us for memory - */ - for (i = 0; i < 3; i++) { - if ((hose->mem_resources[i].flags & IORESOURCE_MEM) && - hose->mem_resources[i].start == hose->pci_mem_offset) - return 0; - } - - /* Well, it starts at 0 and we know it will collide so we may as - * well consider it as unassigned. That covers the Apple case. - */ - return 1; - } else { - /* If the BAR is non-0, then we consider it assigned */ - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - if (((res->start - offset) & 0xfffffffful) != 0) - return 0; - - /* Here, we are a bit different than memory as typically IO - * space starting at low addresses -is- valid. What we do - * instead if that we consider as unassigned anything that - * doesn't have IO enabled in the PCI command register, - * and that's it. - */ - pci_read_config_word(dev, PCI_COMMAND, &command); - if (command & PCI_COMMAND_IO) - return 0; - - /* It's starting at 0 and IO is disabled in the bridge, consider - * it unassigned - */ - return 1; - } -} - -/* Fixup resources of a PCI<->PCI bridge */ -static void pcibios_fixup_bridge(struct pci_bus *bus) -{ - struct resource *res; - int i; - - struct pci_dev *dev = bus->self; - - pci_bus_for_each_resource(bus, res, i) { - if (!res) - continue; - if (!res->flags) - continue; - if (i >= 3 && bus->self->transparent) - continue; - - pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); - - /* Try to detect uninitialized P2P bridge resources, - * and clear them out so they get re-assigned later - */ - if (pcibios_uninitialized_bridge_resource(bus, res)) { - res->flags = 0; - pr_debug("PCI:%s (unassigned)\n", - pci_name(dev)); - } else { - pr_debug("PCI:%s %016llx-%016llx\n", - pci_name(dev), - (unsigned long long)res->start, - (unsigned long long)res->end); - } - } -} - -void pcibios_setup_bus_self(struct pci_bus *bus) -{ - /* Fix up the bus resources for P2P bridges */ - if (bus->self != NULL) - pcibios_fixup_bridge(bus); -} - -void pcibios_setup_bus_devices(struct pci_bus *bus) -{ - struct pci_dev *dev; - - pr_debug("PCI: Fixup bus devices %d (%s)\n", - bus->number, bus->self ? pci_name(bus->self) : "PHB"); - - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Setup OF node pointer in archdata */ - dev->dev.of_node = pci_device_to_OF_node(dev); - - /* Fixup NUMA node as it may not be setup yet by the generic - * code and is needed by the DMA init - */ - set_dev_node(&dev->dev, pcibus_to_node(dev->bus)); - - /* Read default IRQs and fixup if necessary */ - dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); - } -} - -void pcibios_fixup_bus(struct pci_bus *bus) -{ - /* nothing to do */ -} -EXPORT_SYMBOL(pcibios_fixup_bus); - /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the @@ -829,13 +691,6 @@ EXPORT_SYMBOL(pcibios_fixup_bus); * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} -EXPORT_SYMBOL(pcibios_align_resource); - int pcibios_add_device(struct pci_dev *dev) { dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad18eef2..bce2a6431430 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return !!(vcpu->arch.pending_exceptions); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 174575a9a112..fc7726088103 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -78,6 +78,12 @@ static void pcibios_scanbus(struct pci_controller *hose) static int need_domain_info; LIST_HEAD(resources); struct pci_bus *bus; + struct pci_host_bridge *bridge; + int ret; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return; if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY)) next_busno = (*hose->get_busno)(); @@ -87,18 +93,24 @@ static void pcibios_scanbus(struct pci_controller *hose) pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset); pci_add_resource(&resources, hose->busn_resource); - bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose, - &resources); - hose->bus = bus; + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = NULL; + bridge->sysdata = hose; + bridge->busnr = next_busno; + bridge->ops = hose->pci_ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = pcibios_map_irq; + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); + return; + } + + hose->bus = bus = bridge->bus; need_domain_info = need_domain_info || pci_domain_nr(bus); set_pci_need_domain_info(hose, need_domain_info); - if (!bus) { - pci_free_resource_list(&resources); - return; - } - next_busno = bus->busn_res.end + 1; /* Don't allow 8-bit bus number overflow inside the hose - reserve some space for bridges. */ @@ -224,8 +236,6 @@ static int __init pcibios_init(void) list_for_each_entry(hose, &controllers, list) pcibios_scanbus(hose); - pci_fixup_irqs(pci_common_swizzle, pcibios_map_irq); - pci_initialized = 1; return 0; diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f28d21c69f79..508275bb05d5 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -104,6 +104,7 @@ #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) +#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 67075e065ef2..7c62967d672c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1941,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { + kfree(ctx); kvm_put_kvm(kvm); return ret; } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 53766e2bc029..8f2da8bba737 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; struct kvmppc_spapr_tce_iommu_table *stit, *tmp; + struct kvm *kvm = stt->kvm; + mutex_lock(&kvm->lock); list_del_rcu(&stt->list); + mutex_unlock(&kvm->lock); list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { WARN_ON(!kref_read(&stit->kref)); @@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, unsigned long npages, size; int ret = -ENOMEM; int i; - int fd = -1; if (!args->size) return -EINVAL; @@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); - if (ret < 0) - goto fail; - mutex_lock(&kvm->lock); /* Check this LIOBN hasn't been previously allocated */ @@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, } } - if (!ret) { + if (!ret) + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + + if (ret >= 0) { list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); kvm_get_kvm(kvm); } mutex_unlock(&kvm->lock); - if (!ret) - return fd; - - put_unused_fd(fd); + if (ret >= 0) + return ret; fail: for (i = 0; i < npages; i++) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebcf97cb5c98..18e974a34fce 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, switch (subfunc) { case H_VPA_REG_VPA: /* register VPA */ - if (len < sizeof(struct lppaca)) + /* + * The size of our lppaca is 1kB because of the way we align + * it for the guest to avoid crossing a 4kB boundary. We only + * use 640 bytes of the structure though, so we should accept + * clients that set a size of 640. + */ + if (len < 640) break; vpap = &tvcpu->arch.vpa; err = 0; @@ -3336,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, if (radix_enabled()) return -EINVAL; + /* + * POWER7, POWER8 and POWER9 all support 32 storage keys for data. + * POWER7 doesn't support keys for instruction accesses, + * POWER8 and POWER9 do. + */ + info->data_keys = 32; + info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; + info->flags = KVM_PPC_PAGE_SIZES_REAL; if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) info->flags |= KVM_PPC_1T_SEGMENTS; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fedb0139524c..4efe364f1188 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (!realmode) local_irq_restore(irq_flags); - ptel &= ~(HPTE_R_PP0 - psize); + ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; if (pa) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2259b6cde119..663a4a861e7f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -982,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) #ifdef CONFIG_KVM_XICS /* We are entering the guest on that thread, push VCPU to XIVE */ ld r10, HSTATE_XIVE_TIMA_PHYS(r13) - cmpldi cr0, r10, r0 + cmpldi cr0, r10, 0 beq no_xive ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS @@ -1286,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER bne 2f mfspr r3,SPRN_HDEC - cmpwi r3,0 + EXTEND_HDEC(r3) + cmpdi r3,0 mr r4,r9 bge fast_guest_return 2: diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 32fdab57d604..f9f6468f4171 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, if (err) goto free_vcpu; - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { + err = -ENOMEM; goto uninit_vcpu; + } err = kvmppc_e500_tlb_init(vcpu_e500); if (err) goto uninit_id; vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) + if (!vcpu->arch.shared) { + err = -ENOMEM; goto uninit_tlb; + } return vcpu; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index f48a0c22e8f9..d0b6b5788afc 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, goto uninit_vcpu; vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!vcpu->arch.shared) + if (!vcpu->arch.shared) { + err = -ENOMEM; goto uninit_tlb; + } return vcpu; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b5f4ca..3480faaf1ef8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a409d5991934..51375e766e90 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -226,7 +226,9 @@ struct kvm_s390_sie_block { #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ __u32 scaol; /* 0x0064 */ - __u8 reserved68[4]; /* 0x0068 */ + __u8 reserved68; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 reserved6a[2]; /* 0x006a */ __u32 todpr; /* 0x006c */ __u8 reserved70[16]; /* 0x0070 */ __u64 mso; /* 0x0080 */ @@ -265,6 +267,7 @@ struct kvm_s390_sie_block { __u64 cbrlo; /* 0x01b8 */ __u8 reserved1c0[8]; /* 0x01c0 */ #define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 __u32 ecd; /* 0x01c8 */ __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ @@ -739,6 +742,7 @@ struct kvm_arch{ struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; struct kvm_s390_vsie vsie; + u8 epdx; u64 epoch; struct kvm_s390_migration_state *migration_state; /* subset of available cpu features enabled by user space */ diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index ca21b28a7b17..22b0f49e87c1 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -15,6 +15,6 @@ #define ESSA_SET_STABLE_IF_RESIDENT 6 #define ESSA_SET_STABLE_NODAT 7 -#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT +#define ESSA_MAX ESSA_SET_STABLE_NODAT #endif diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 69d09c39bbcd..cd7359e23d86 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 +#define KVM_S390_VM_TOD_EXT 2 + +struct kvm_s390_vm_tod_clock { + __u8 epoch_idx; + __u64 tod; +}; /* kvm attributes for KVM_S390_VM_CPU_MODEL */ /* processor related attributes are r/w */ diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index e4d36094aceb..d93a2c0474bf 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, true); return 0; } diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index c2e0ddc1356e..bcbd86621d01 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b) return (addr >= a) && (addr <= b); else /* "overflowing" interval */ - return (addr <= a) && (addr >= b); + return (addr >= a) || (addr <= b); } #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a619ddae610d..a832ad031cee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk; union mci mci; __u64 cr14 = 0; /* upper bits are not used */ + int rc; mci.val = mcck_info->mcic; if (mci.sr) @@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, if (mci.ck) { /* Inject the floating machine check */ inti.type = KVM_S390_MCHK; - WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); + rc = __inject_vm(vcpu->kvm, &inti); } else { /* Inject the machine check to specified vcpu */ irq.type = KVM_S390_MCHK; - WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); + rc = kvm_s390_inject_vcpu(vcpu, &irq); } + WARN_ON_ONCE(rc); } int kvm_set_routing_entry(struct kvm *kvm, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af09d3437631..40d0a1a97889 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +struct kvm_s390_tod_clock_ext { + __u8 epoch_idx; + __u64 tod; + __u8 reserved[7]; +} __packed; + /* allow nested virtualization in KVM (if enabled by user space) */ static int nested; module_param(nested, int, S_IRUGO); @@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_tod_clock gtod; + + if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) + return -EFAULT; + + if (test_kvm_facility(kvm, 139)) + kvm_s390_set_tod_clock_ext(kvm, >od); + else if (gtod.epoch_idx == 0) + kvm_s390_set_tod_clock(kvm, gtod.tod); + else + return -EINVAL; + + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", + gtod.epoch_idx, gtod.tod); + + return 0; +} + static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high; @@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; switch (attr->attr) { + case KVM_S390_VM_TOD_EXT: + ret = kvm_s390_set_tod_ext(kvm, attr); + break; case KVM_S390_VM_TOD_HIGH: ret = kvm_s390_set_tod_high(kvm, attr); break; @@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) return ret; } +static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, + struct kvm_s390_vm_tod_clock *gtod) +{ + struct kvm_s390_tod_clock_ext htod; + + preempt_disable(); + + get_tod_clock_ext((char *)&htod); + + gtod->tod = htod.tod + kvm->arch.epoch; + gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; + + if (gtod->tod < htod.tod) + gtod->epoch_idx += 1; + + preempt_enable(); +} + +static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_tod_clock gtod; + + memset(>od, 0, sizeof(gtod)); + + if (test_kvm_facility(kvm, 139)) + kvm_s390_get_tod_clock_ext(kvm, >od); + else + gtod.tod = kvm_s390_get_tod_clock_fast(kvm); + + if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) + return -EFAULT; + + VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", + gtod.epoch_idx, gtod.tod); + return 0; +} + static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high = 0; @@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; switch (attr->attr) { + case KVM_S390_VM_TOD_EXT: + ret = kvm_s390_get_tod_ext(kvm, attr); + break; case KVM_S390_VM_TOD_HIGH: ret = kvm_s390_get_tod_high(kvm, attr); break; @@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, if (r < 0) pgstev = 0; /* save the value */ - res[i++] = (pgstev >> 24) & 0x3; + res[i++] = (pgstev >> 24) & 0x43; /* * if the next bit is too far away, stop. * if we reached the previous "next", find the next one @@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, pgstev = bits[i]; pgstev = pgstev << 24; - mask &= _PGSTE_GPS_USAGE_MASK; + mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; set_pgste_bits(kvm->mm, hva, mask, pgstev); } srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); + /* we are always in czam mode - even on pre z14 machines */ + set_kvm_facility(kvm->arch.model.fac_mask, 138); + set_kvm_facility(kvm->arch.model.fac_list, 138); + /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); + if (MACHINE_HAS_TLB_GUEST) { + set_kvm_facility(kvm->arch.model.fac_mask, 147); + set_kvm_facility(kvm->arch.model.fac_list, 147); + } kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; @@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= ECA_VX; vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; } + if (test_kvm_facility(vcpu->kvm, 139)) + vcpu->arch.sie_block->ecd |= ECD_MEF; + vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; @@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_s390_vcpu_has_irq(vcpu, 0); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); +} + void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) { atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); @@ -2855,6 +2940,35 @@ retry: return 0; } +void kvm_s390_set_tod_clock_ext(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod) +{ + struct kvm_vcpu *vcpu; + struct kvm_s390_tod_clock_ext htod; + int i; + + mutex_lock(&kvm->lock); + preempt_disable(); + + get_tod_clock_ext((char *)&htod); + + kvm->arch.epoch = gtod->tod - htod.tod; + kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + + if (kvm->arch.epoch > gtod->tod) + kvm->arch.epdx -= 1; + + kvm_s390_vcpu_block_all(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.sie_block->epoch = kvm->arch.epoch; + vcpu->arch.sie_block->epdx = kvm->arch.epdx; + } + + kvm_s390_vcpu_unblock_all(kvm); + preempt_enable(); + mutex_unlock(&kvm->lock); +} + void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6fedc8bc7a37..9f8fdd7b2311 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); int handle_sthyi(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ +void kvm_s390_set_tod_clock_ext(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod); void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 785ad028bde6..c954ac49eee4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) if (pgstev & _PGSTE_GPS_ZERO) res |= 1; } + if (pgstev & _PGSTE_GPS_NODAT) + res |= 0x20; vcpu->run->s.regs.gprs[r1] = res; /* * It is possible that all the normal 511 slots were full, in which case @@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); /* Check for invalid operation request code */ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; - if (orc > ESSA_MAX) + /* ORCs 0-6 are always valid */ + if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT + : ESSA_SET_STABLE_IF_RESIDENT)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (likely(!vcpu->kvm->arch.migration_state)) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1a252f537081..9d592ef4104b 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, return rc; } -static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, + u64 *status_reg) { - int rc; unsigned int i; struct kvm_vcpu *v; + bool all_stopped = true; - switch (parameter & 0xff) { - case 0: - rc = SIGP_CC_NOT_OPERATIONAL; - break; - case 1: - case 2: - kvm_for_each_vcpu(i, v, vcpu->kvm) { - v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(v); - } - - rc = SIGP_CC_ORDER_CODE_ACCEPTED; - break; - default: - rc = -EOPNOTSUPP; + kvm_for_each_vcpu(i, v, vcpu->kvm) { + if (v == vcpu) + continue; + if (!is_vcpu_stopped(v)) + all_stopped = false; } - return rc; + + *status_reg &= 0xffffffff00000000UL; + + /* Reject set arch order, with czam we're always in z/Arch mode. */ + *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : + SIGP_STATUS_INCORRECT_STATE); + return SIGP_CC_STATUS_STORED; } static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, @@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) switch (order_code) { case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; - rc = __sigp_set_arch(vcpu, parameter); + rc = __sigp_set_arch(vcpu, parameter, + &vcpu->run->s.regs.gprs[r1]); break; default: rc = handle_sigp_dst(vcpu, order_code, cpu_addr, diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index a2e5c24f47a7..395926b8c1ed 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -436,14 +436,6 @@ int handle_sthyi(struct kvm_vcpu *vcpu) if (addr & ~PAGE_MASK) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* - * If the page has not yet been faulted in, we want to do that - * now and not after all the expensive calculations. - */ - r = write_guest(vcpu, addr, reg2, &cc, 1); - if (r) - return kvm_s390_inject_prog_cond(vcpu, r); - sctns = (void *)get_zeroed_page(GFP_KERNEL); if (!sctns) return -ENOMEM; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ba8203e4d516..b18b5652e5c5 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->eca |= scb_o->eca & ECA_IB; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; + /* Epoch Extension */ + if (test_kvm_facility(vcpu->kvm, 139)) + scb_s->ecd |= scb_o->ecd & ECD_MEF; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; - struct mcck_volatile_info *mcck_info; - struct sie_page *sie_page; int rc; handle_last_fault(vcpu, vsie_page); @@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (rc == -EINTR) { VCPU_EVENT(vcpu, 3, "%s", "machine check"); - sie_page = container_of(scb_s, struct sie_page, sie_block); - mcck_info = &sie_page->mcck_info; - kvm_s390_reinject_machine_check(vcpu, mcck_info); + kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); return 0; } @@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, */ preempt_disable(); scb_s->epoch += vcpu->kvm->arch.epoch; + + if (scb_s->ecd & ECD_MEF) { + scb_s->epdx += vcpu->kvm->arch.epdx; + if (scb_s->epoch < vcpu->kvm->arch.epoch) + scb_s->epdx += 1; + } + preempt_enable(); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4198a71b8fdd..ae677f814bc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -919,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, case ESSA_GET_STATE: break; case ESSA_SET_STABLE: - pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgstev |= _PGSTE_GPS_USAGE_STABLE; break; case ESSA_SET_UNUSED: @@ -965,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, pgstev |= _PGSTE_GPS_USAGE_STABLE; } break; + case ESSA_SET_STABLE_NODAT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; + break; default: /* we should never get here! */ break; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 7b30af5da222..ddb9923fb45d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -262,10 +262,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -void pcibios_fixup_bus(struct pci_bus *bus) -{ -} - resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 29d72bf8ed2b..70dd8f17d054 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -83,6 +83,7 @@ static struct facility_def facility_defs[] = { 78, /* enhanced-DAT 2 */ 130, /* instruction-execution-protection */ 131, /* enhanced-SOP 2 and side-effect */ + 139, /* multiple epoch facility */ 146, /* msa extension 8 */ -1 /* END */ } diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c index edc2fb7a5bb2..32467884d6f7 100644 --- a/arch/sh/drivers/pci/fixups-cayman.c +++ b/arch/sh/drivers/pci/fixups-cayman.c @@ -5,7 +5,7 @@ #include <cpu/irq.h> #include "pci-sh5.h" -int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int result = -1; diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c index 1d1c5a227e50..9d597f7ab8dd 100644 --- a/arch/sh/drivers/pci/fixups-dreamcast.c +++ b/arch/sh/drivers/pci/fixups-dreamcast.c @@ -76,7 +76,7 @@ static void gapspci_fixup_resources(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, gapspci_fixup_resources); -int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* * The interrupt routing semantics here are quite trivial. diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c index 57ed3f09d0c2..2c9b58f848dd 100644 --- a/arch/sh/drivers/pci/fixups-r7780rp.c +++ b/arch/sh/drivers/pci/fixups-r7780rp.c @@ -15,7 +15,7 @@ #include <linux/sh_intc.h> #include "pci-sh4.h" -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { return evt2irq(0xa20) + slot; } diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c index eaddb56c45c6..358ac104f08c 100644 --- a/arch/sh/drivers/pci/fixups-rts7751r2d.c +++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c @@ -20,18 +20,18 @@ #define PCIMCR_MRSET_OFF 0xBFFFFFFF #define PCIMCR_RFSH_OFF 0xFFFFFFFB -static u8 rts7751r2d_irq_tab[] __initdata = { +static u8 rts7751r2d_irq_tab[] = { IRQ_PCI_INTA, IRQ_PCI_INTB, IRQ_PCI_INTC, IRQ_PCI_INTD, }; -static char lboxre2_irq_tab[] __initdata = { +static char lboxre2_irq_tab[] = { IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD, }; -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { if (mach_is_lboxre2()) return lboxre2_irq_tab[slot]; diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index c0a015ae6ecf..24e96dfbdb22 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -22,7 +22,7 @@ #define IRQ_INTD evt2irq(0xa80) /* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */ -static char sdk7780_irq_tab[4][16] __initdata = { +static char sdk7780_irq_tab[4][16] = { /* INTA */ { IRQ_INTA, IRQ_INTD, IRQ_INTC, IRQ_INTD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, @@ -37,7 +37,7 @@ static char sdk7780_irq_tab[4][16] __initdata = { -1, -1, -1 }, }; -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { return sdk7780_irq_tab[pin-1][slot]; } diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c index 84a88ca92008..1cb8d0ac4fdb 100644 --- a/arch/sh/drivers/pci/fixups-se7751.c +++ b/arch/sh/drivers/pci/fixups-se7751.c @@ -7,7 +7,7 @@ #include <linux/sh_intc.h> #include "pci-sh4.h" -int __init pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin) { switch (slot) { case 0: return evt2irq(0x3a0); diff --git a/arch/sh/drivers/pci/fixups-sh03.c b/arch/sh/drivers/pci/fixups-sh03.c index 16207bef9f52..55ac1ba2c74f 100644 --- a/arch/sh/drivers/pci/fixups-sh03.c +++ b/arch/sh/drivers/pci/fixups-sh03.c @@ -4,7 +4,7 @@ #include <linux/pci.h> #include <linux/sh_intc.h> -int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c index 6e33ba4cd076..a931e5928f58 100644 --- a/arch/sh/drivers/pci/fixups-snapgear.c +++ b/arch/sh/drivers/pci/fixups-snapgear.c @@ -19,7 +19,7 @@ #include <linux/sh_intc.h> #include "pci-sh4.h" -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c index bd1addb1b8be..a9d563e479d5 100644 --- a/arch/sh/drivers/pci/fixups-titan.c +++ b/arch/sh/drivers/pci/fixups-titan.c @@ -19,7 +19,7 @@ #include <mach/titan.h> #include "pci-sh4.h" -static char titan_irq_tab[] __initdata = { +static char titan_irq_tab[] = { TITAN_IRQ_WAN, TITAN_IRQ_LAN, TITAN_IRQ_MPCIA, @@ -27,7 +27,7 @@ static char titan_irq_tab[] __initdata = { TITAN_IRQ_USB, }; -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { int irq = titan_irq_tab[slot]; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index c99ee286b69f..5976a2c8a3e3 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -39,8 +39,12 @@ static void pcibios_scanbus(struct pci_channel *hose) LIST_HEAD(resources); struct resource *res; resource_size_t offset; - int i; - struct pci_bus *bus; + int i, ret; + struct pci_host_bridge *bridge; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return; for (i = 0; i < hose->nr_resources; i++) { res = hose->resources + i; @@ -52,19 +56,26 @@ static void pcibios_scanbus(struct pci_channel *hose) pci_add_resource_offset(&resources, res, offset); } - bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose, - &resources); - hose->bus = bus; + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = NULL; + bridge->sysdata = hose; + bridge->busnr = next_busno; + bridge->ops = hose->pci_ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = pcibios_map_platform_irq; + + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); + return; + } + + hose->bus = bridge->bus; need_domain_info = need_domain_info || hose->index; hose->need_domain_info = need_domain_info; - if (!bus) { - pci_free_resource_list(&resources); - return; - } - - next_busno = bus->busn_res.end + 1; + next_busno = hose->bus->busn_res.end + 1; /* Don't allow 8-bit bus number overflow inside the hose - reserve some space for bridges. */ if (next_busno > 224) { @@ -72,9 +83,9 @@ static void pcibios_scanbus(struct pci_channel *hose) need_domain_info = 1; } - pci_bus_size_bridges(bus); - pci_bus_assign_resources(bus); - pci_bus_add_devices(bus); + pci_bus_size_bridges(hose->bus); + pci_bus_assign_resources(hose->bus); + pci_bus_add_devices(hose->bus); } /* @@ -144,8 +155,6 @@ static int __init pcibios_init(void) for (hose = hose_head; hose; hose = hose->next) pcibios_scanbus(hose); - pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); - dma_debug_add_bus(&pci_bus_type); pci_initialized = 1; @@ -155,14 +164,6 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); /* - * Called after each bus is probed, but before its children - * are examined. - */ -void pcibios_fixup_bus(struct pci_bus *bus) -{ -} - -/* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the * addresses to be allocated in the 0x000-0x0ff region diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index a162a7f86b2e..0167a7352719 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -467,7 +467,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port) return 0; } -int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) { return evt2irq(0xae0); } diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 4371f72ff025..98c223edac84 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -25,6 +25,12 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) { LIST_HEAD(resources); struct pci_bus *root_bus; + struct pci_host_bridge *bridge; + int ret; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return; pci_add_resource_offset(&resources, &info->io_space, info->io_space.start - 0x1000); @@ -32,15 +38,21 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) info->busn.flags = IORESOURCE_BUS; pci_add_resource(&resources, &info->busn); - root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info, - &resources); - if (!root_bus) { - pci_free_resource_list(&resources); + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = &ofdev->dev; + bridge->sysdata = info; + bridge->busnr = 0; + bridge->ops = info->ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = info->map_irq; + + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); return; } - /* Setup IRQs of all devices using custom routines */ - pci_fixup_irqs(pci_common_swizzle, info->map_irq); + root_bus = bridge->bus; /* Assign devices with resources */ pci_assign_unassigned_resources(); @@ -94,9 +106,3 @@ void pcibios_fixup_bus(struct pci_bus *pbus) } } } - -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 7eceaa10836f..3f8670c92951 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -690,16 +690,6 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, return bus; } -void pcibios_fixup_bus(struct pci_bus *pbus) -{ -} - -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, oldcmd; diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 732af9a9f6dd..4a133c052af8 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -746,12 +746,6 @@ static void watchdog_reset() { } #endif -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} - int pcibios_enable_device(struct pci_dev *pdev, int mask) { return 0; diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index bc6656b5708b..bbf81579b1f8 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -67,16 +67,6 @@ static struct pci_ops tile_cfg_ops; /* - * We don't need to worry about the alignment of resources. - */ -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, resource_size_t align) -{ - return res->start; -} -EXPORT_SYMBOL(pcibios_align_resource); - -/* * Open a FD to the hypervisor PCI device. * * controller_id is the controller number, config type is 0 or 1 for @@ -274,6 +264,7 @@ static void fixup_read_and_payload_sizes(void) */ int __init pcibios_init(void) { + struct pci_host_bridge *bridge; int i; pr_info("PCI: Probing PCI hardware\n"); @@ -306,16 +297,26 @@ int __init pcibios_init(void) pci_add_resource(&resources, &ioport_resource); pci_add_resource(&resources, &iomem_resource); - bus = pci_scan_root_bus(NULL, 0, controller->ops, - controller, &resources); + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + break; + + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = NULL; + bridge->sysdata = controller; + bridge->busnr = 0; + bridge->ops = controller->ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = tile_map_irq; + + pci_scan_root_bus_bridge(bridge); + bus = bridge->bus; controller->root_bus = bus; controller->last_busno = bus->busn_res.end; } } - /* Do machine dependent PCI interrupt routing */ - pci_fixup_irqs(pci_common_swizzle, tile_map_irq); - /* * This comes from the generic Linux PCI driver. * @@ -369,14 +370,6 @@ int __init pcibios_init(void) } subsys_initcall(pcibios_init); -/* - * No bus fixups needed. - */ -void pcibios_fixup_bus(struct pci_bus *bus) -{ - /* Nothing needs to be done. */ -} - void pcibios_set_master(struct pci_dev *dev) { /* No special bus mastering setup handling. */ diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index b554a68eea1b..9aa238ac7b35 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -108,15 +108,6 @@ static struct pci_ops tile_cfg_ops; /* Mask of CPUs that should receive PCIe interrupts. */ static struct cpumask intr_cpus_map; -/* We don't need to worry about the alignment of resources. */ -resource_size_t pcibios_align_resource(void *data, const struct resource *res, - resource_size_t size, - resource_size_t align) -{ - return res->start; -} -EXPORT_SYMBOL(pcibios_align_resource); - /* * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #. * For now, we simply send interrupts to non-dataplane CPUs. @@ -669,6 +660,7 @@ int __init pcibios_init(void) resource_size_t offset; LIST_HEAD(resources); int next_busno; + struct pci_host_bridge *bridge; int i; tile_pci_init(); @@ -881,15 +873,25 @@ int __init pcibios_init(void) controller->mem_offset); pci_add_resource(&resources, &controller->io_space); controller->first_busno = next_busno; - bus = pci_scan_root_bus(NULL, next_busno, controller->ops, - controller, &resources); + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + break; + + list_splice_init(&resources, &bridge->windows); + bridge->dev.parent = NULL; + bridge->sysdata = controller; + bridge->busnr = next_busno; + bridge->ops = controller->ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = tile_map_irq; + + pci_scan_root_bus_bridge(bridge); + bus = bridge->bus; controller->root_bus = bus; next_busno = bus->busn_res.end + 1; } - /* Do machine dependent PCI interrupt routing */ - pci_fixup_irqs(pci_common_swizzle, tile_map_irq); - /* * This comes from the generic Linux PCI driver. * @@ -1038,11 +1040,6 @@ alloc_mem_map_failed: } subsys_initcall(pcibios_init); -/* No bus fixups needed. */ -void pcibios_fixup_bus(struct pci_bus *bus) -{ -} - /* Process any "pci=" kernel boot arguments. */ char *__init pcibios_setup(char *str) { diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index 1053bca1f8aa..9f26840e41b1 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -101,7 +101,7 @@ void pci_puv3_preinit(void) writel(readl(PCIBRI_CMD) | PCIBRI_CMD_IO | PCIBRI_CMD_MEM, PCIBRI_CMD); } -static int __init pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (dev->bus->number == 0) { #ifdef CONFIG_ARCH_FPGA /* 4 pci slots */ @@ -252,19 +252,46 @@ void pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 255, + .flags = IORESOURCE_BUS, +}; + static int __init pci_common_init(void) { struct pci_bus *puv3_bus; + struct pci_host_bridge *bridge; + int ret; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return -ENOMEM; pci_puv3_preinit(); - puv3_bus = pci_scan_bus(0, &pci_puv3_ops, NULL); + pci_add_resource(&bridge->windows, &ioport_resource); + pci_add_resource(&bridge->windows, &iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->sysdata = NULL; + bridge->busnr = 0; + bridge->ops = &pci_puv3_ops; + bridge->swizzle_irq = pci_common_swizzle; + bridge->map_irq = pci_puv3_map_irq; + + /* Scan our single hose. */ + ret = pci_scan_root_bus_bridge(bridge); + if (ret) { + pci_free_host_bridge(bridge); + return; + } + + puv3_bus = bridge->bus; if (!puv3_bus) panic("PCI: unable to scan bus!"); - pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq); - pci_bus_size_bridges(puv3_bus); pci_bus_assign_resources(puv3_bus); pci_bus_add_devices(puv3_bus); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 42bbbf0f173d..2519c6c801c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,7 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -219,8 +219,8 @@ struct x86_emulate_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 369e41c23f07..8844eee290b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,15 +79,14 @@ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL #define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ - | X86_CR4_PKE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ + | X86_CR4_SMAP | X86_CR4_PKE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -204,7 +203,6 @@ enum { #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ - PFERR_USER_MASK | \ PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) @@ -317,15 +315,17 @@ struct kvm_pio_request { int size; }; +#define PT64_ROOT_MAX_LEVEL 5 + struct rsvd_bits_validate { - u64 rsvd_bits_mask[2][4]; + u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; }; /* - * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level - * 32-bit). The kvm_mmu structure abstracts the details of the current mmu - * mode. + * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, + * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the + * current mmu mode. */ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); @@ -548,8 +548,8 @@ struct kvm_vcpu_arch { struct kvm_queued_exception { bool pending; + bool injected; bool has_error_code; - bool reinject; u8 nr; u32 error_code; u8 nested_apf; @@ -687,8 +687,12 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available (AMD only) */ + /* GPA available */ bool gpa_available; + gpa_t gpa_val; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { @@ -979,7 +983,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); - int (*get_tdp_level)(void); + int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); @@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -static inline u64 get_canonical(u64 la) -{ - return ((int64_t)la << 16) >> 16; -} - -static inline bool is_noncanonical_address(u64 la) -{ -#ifdef CONFIG_X86_64 - return get_canonical(la) != la; -#else - return false; -#endif -} - #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IRQ_SHIFT 8 #define V_IRQ_MASK (1 << V_IRQ_SHIFT) +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) @@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,6 +72,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_RDRAND 0x00000800 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_RDSEED 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 @@ -114,6 +115,10 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +/* VMFUNC functions */ +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_EPTP_ENTRIES 512 + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); @@ -187,6 +192,8 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, POSTED_INTR_DESC_ADDR = 0x00002016, POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CONTROL = 0x00002018, + VM_FUNCTION_CONTROL_HIGH = 0x00002019, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -197,6 +204,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, @@ -444,6 +453,7 @@ enum vmcs_field { #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) +#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) #define VMX_EPTP_UC_BIT (1ull << 8) #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) @@ -459,12 +469,14 @@ enum vmcs_field { #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ -#define VMX_EPT_DEFAULT_GAW 3 -#define VMX_EPT_MAX_GAW 0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 -#define VMX_EPT_GAW_EPTP_SHIFT 3 -#define VMX_EPT_AD_ENABLE_BIT (1ull << 6) -#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPTP_PWL_MASK 0x38ull +#define VMX_EPTP_PWL_4 0x18ull +#define VMX_EPTP_PWL_5 0x20ull +#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +#define VMX_EPTP_MT_MASK 0x7ull +#define VMX_EPTP_MT_WB 0x6ull +#define VMX_EPTP_MT_UC 0x0ull #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19adbb418443..0099e10eb045 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); /* - * The existing code assumes virtual address is 48-bit in the canonical - * address checks; exit if it is ever changed. + * The existing code assumes virtual address is 48-bit or 57-bit in the + * canonical address checks; exit if it is ever changed. */ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && ((best->eax & 0xff00) >> 8) != 48 && - ((best->eax & 0xff00) >> 8) != 0) - return -EINVAL; + if (best) { + int vaddr_bits = (best->eax & 0xff00) >> 8; + + if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) + return -EINVAL; + } /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_mmu_reset_context(vcpu); kvm_pmu_refresh(vcpu); return 0; @@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); + F(AVX512VBMI) | F(LA57) | F(PKU) | + 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = @@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); } -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit) { u32 function = *eax, index = *ecx; struct kvm_cpuid_entry2 *best; + bool entry_found = true; best = kvm_find_cpuid_entry(vcpu, function, index); - if (!best) + if (!best) { + entry_found = false; + if (!check_limit) + goto out; + best = check_cpuid_limit(vcpu, function, index); + } +out: if (best) { *eax = best->eax; *ebx = best->ebx; @@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) *edx = best->edx; } else *eax = *ebx = *ecx = *edx = 0; - trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); + return entry_found; } EXPORT_SYMBOL_GPL(kvm_cpuid); @@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) eax = kvm_register_read(vcpu, VCPU_REGS_RAX); ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); + kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); kvm_register_write(vcpu, VCPU_REGS_RAX, eax); kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da6728383052..1ea3c0e1e3a9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -3,6 +3,7 @@ #include "x86.h" #include <asm/cpu.h> +#include <asm/processor.h> int kvm_update_cpuid(struct kvm_vcpu *vcpu); bool kvm_mpx_supported(void); @@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); -void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx, bool check_limit); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); @@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) return vcpu->arch.maxphyaddr; } -static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - if (!static_cpu_has(X86_FEATURE_XSAVE)) - return false; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_XSAVE)); -} - -static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->edx & bit(X86_FEATURE_MTRR)); -} - -static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST)); -} +struct cpuid_reg { + u32 function; + u32 index; + int reg; +}; -static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMEP)); -} +static const struct cpuid_reg reverse_cpuid[] = { + [CPUID_1_EDX] = { 1, 0, CPUID_EDX}, + [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX}, + [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, + [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, + [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, + [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, + [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, + [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, + [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, + [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, + [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, + [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, + [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, + [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, +}; -static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) +static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_SMAP)); -} + unsigned x86_leaf = x86_feature / 32; -static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); + BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); + BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); + return reverse_cpuid[x86_leaf]; } -static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu) +static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ecx & bit(X86_FEATURE_PKU)); -} + struct kvm_cpuid_entry2 *entry; + const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); -static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index); + if (!entry) + return NULL; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_LM)); + switch (cpuid.reg) { + case CPUID_EAX: + return &entry->eax; + case CPUID_EBX: + return &entry->ebx; + case CPUID_ECX: + return &entry->ecx; + case CPUID_EDX: + return &entry->edx; + default: + BUILD_BUG(); + return NULL; + } } -static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) +static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->ecx & bit(X86_FEATURE_OSVW)); -} + if (x86_feature == X86_FEATURE_XSAVE && + !static_cpu_has(X86_FEATURE_XSAVE)) + return false; -static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (!reg) + return false; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_PCID)); + return *reg & bit(x86_feature); } -static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) +static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) { - struct kvm_cpuid_entry2 *best; + int *reg; - best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & bit(X86_FEATURE_X2APIC)); + reg = guest_cpuid_get_register(vcpu, x86_feature); + if (reg) + *reg &= ~bit(x86_feature); } static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) @@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; } -static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_GBPAGES)); -} - -static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_RTM)); -} - -static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_MPX)); -} - -static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - return best && (best->edx & bit(X86_FEATURE_RDTSCP)); -} - -/* - * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 - */ -#define BIT_NRIPS 3 - -static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); - - /* - * NRIPS is a scattered cpuid feature, so we can't use - * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit - * position 8, not 3). - */ - return best && (best->edx & bit(BIT_NRIPS)); -} -#undef BIT_NRIPS - static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb0055953fbc..16bf6655aa85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -28,6 +28,7 @@ #include "x86.h" #include "tss.h" +#include "mmu.h" /* * Operand types @@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, ulong la; u32 lim; u16 sel; + u8 va_bits; la = seg_base(ctxt, addr.seg) + addr.ea; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: *linear = la; - if (is_noncanonical_address(la)) + va_bits = ctxt_virt_addr_bits(ctxt); + if (get_canonical(la, va_bits) != la) goto bad; - *max_size = min_t(u64, ~0u, (1ull << 48) - la); + *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); if (size > *max_size) goto bad; break; @@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, sizeof(base3), &ctxt->exception); if (ret != X86EMUL_CONTINUE) return ret; - if (is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32))) + if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | + ((u64)base3 << 32), ctxt)) return emulate_gp(ctxt, 0); } load: @@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) eax = 0x80000001; ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return edx & bit(X86_FEATURE_LM); } @@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) u32 eax, ebx, ecx, edx; eax = ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; @@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) eax = 0x00000000; ecx = 0x00000000; - ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); /* * Intel ("GenuineIntel") * remark: Intel CPUs only support "syscall" in 64bit @@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (is_noncanonical_address(rcx) || - is_noncanonical_address(rdx)) + if (emul_is_noncanonical_address(rcx, ctxt) || + emul_is_noncanonical_address(rdx, ctxt)) return emulate_gp(ctxt, 0); break; } @@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) /* * Check MOVBE is set in the guest-visible CPUID leaf. */ - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(ecx & FFL(MOVBE))) return emulate_ud(ctxt); @@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - is_noncanonical_address(desc_ptr.address)) + emul_is_noncanonical_address(desc_ptr.address, ctxt)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); @@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) eax = reg_read(ctxt, VCPU_REGS_RAX); ecx = reg_read(ctxt, VCPU_REGS_RCX); - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); *reg_write(ctxt, VCPU_REGS_RAX) = eax; *reg_write(ctxt, VCPU_REGS_RBX) = ebx; *reg_write(ctxt, VCPU_REGS_RCX) = ecx; @@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) { u32 eax = 1, ebx, ecx = 0, edx; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); if (!(edx & FFL(FXSR))) return emulate_ud(ctxt); @@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) u64 rsvd = 0; ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; + if (efer & EFER_LMA) { + u64 maxphyaddr; + u32 eax = 0x80000008; + + if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, + NULL, false)) + maxphyaddr = eax & 0xff; + else + maxphyaddr = 36; + rsvd = rsvd_bits(maxphyaddr, 62); + } if (new_val & rsvd) return emulate_gp(ctxt, 0); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 337b6d2730fa..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), pdata); } + case HV_X64_MSR_TSC_FREQUENCY: + data = (u64)vcpu->arch.virtual_tsc_khz * 1000; + break; + case HV_X64_MSR_APIC_FREQUENCY: + data = APIC_BUS_FREQUENCY; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, true); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index e1e89ee4af75..9add410f195f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -4,7 +4,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 589dcc117086..aaf10b6f5380 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -54,8 +54,6 @@ #define PRIu64 "u" #define PRIo64 "o" -#define APIC_BUS_CYCLE_NS 1 - /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ #define apic_debug(fmt, arg...) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 29caa2c3dff9..215721e1426a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,6 +12,9 @@ #define KVM_APIC_SHORT_MASK 0xc0000 #define KVM_APIC_DEST_MASK 0x800 +#define APIC_BUS_CYCLE_NS 1 +#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 04d750813c9d..eca30c1eb1d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, } struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; - unsigned int idx[PT64_ROOT_LEVEL]; + struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; + unsigned int idx[PT64_ROOT_MAX_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ @@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, iterator->shadow_addr = vcpu->arch.mmu.root_hpa; iterator->level = vcpu->arch.mmu.shadow_root_level; - if (iterator->level == PT64_ROOT_LEVEL && - vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && + if (iterator->level == PT64_ROOT_4LEVEL && + vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && !vcpu->arch.mmu.direct_map) --iterator->level; @@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, sp = list_last_entry(&kvm->arch.active_mmu_pages, struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); - - return true; + return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } /* @@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); -static void make_mmu_pages_available(struct kvm_vcpu *vcpu); +static int make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && - (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && + (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || vcpu->arch.mmu.direct_map)) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; unsigned i; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); + if(make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } + sp = kvm_mmu_get_page(vcpu, 0, 0, + vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); @@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, - 0, ACC_ALL); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } + sp = kvm_mmu_get_page(vcpu, root_gfn, 0, + vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3445,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; for (i = 0; i < 4; ++i) { @@ -3463,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) { + spin_unlock(&vcpu->kvm->mmu_lock); + return 1; + } sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL); root = __pa(sp->spt); @@ -3478,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * If we shadow a 32 bit page table with a long mode page * table we enter this path. */ - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) { if (vcpu->arch.mmu.lm_root == NULL) { /* * The additional page necessary for this is only @@ -3523,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; sp = page_header(root); mmu_sync_children(vcpu, sp); @@ -3588,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { + /* + * A nested guest cannot use the MMIO cache if it is using nested + * page tables, because cr2 is a nGPA while the cache stores GPAs. + */ + if (mmu_is_nested(vcpu)) + return false; + if (direct) return vcpu_match_mmio_gpa(vcpu, addr); @@ -3599,7 +3618,7 @@ static bool walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; - u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; + u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; int root, leaf; bool reserved = false; @@ -3640,7 +3659,23 @@ exit: return reserved; } -int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) +/* + * Return values of handle_mmio_page_fault: + * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction + * directly. + * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page + * fault path update the mmio spte. + * RET_MMIO_PF_RETRY: let CPU fault again on the address. + * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). + */ +enum { + RET_MMIO_PF_EMULATE = 1, + RET_MMIO_PF_INVALID = 2, + RET_MMIO_PF_RETRY = 0, + RET_MMIO_PF_BUG = -1 +}; + +static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; @@ -3872,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -4025,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; break; - case PT64_ROOT_LEVEL: + case PT64_ROOT_5LEVEL: + rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd | + nonleaf_bit8_rsvd | rsvd_bits(7, 7) | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[1][4] = + rsvd_check->rsvd_bits_mask[0][4]; + case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); @@ -4055,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, { __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, - context->nx, guest_cpuid_has_gbpages(vcpu), + context->nx, + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } @@ -4065,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, { u64 bad_mt_xwr; + rsvd_check->rsvd_bits_mask[0][4] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); rsvd_check->rsvd_bits_mask[0][2] = @@ -4074,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); /* large page */ + rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; rsvd_check->rsvd_bits_mask[1][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); @@ -4120,8 +4166,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) __reset_rsvds_bits_mask(vcpu, shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, uses_nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), - true); + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), + is_pse(vcpu), true); if (!shadow_me_mask) return; @@ -4185,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, boot_cpu_data.x86_phys_bits, execonly); } +#define BYTE_MASK(access) \ + ((1 & (access) ? 2 : 0) | \ + (2 & (access) ? 4 : 0) | \ + (3 & (access) ? 8 : 0) | \ + (4 & (access) ? 16 : 0) | \ + (5 & (access) ? 32 : 0) | \ + (6 & (access) ? 64 : 0) | \ + (7 & (access) ? 128 : 0)) + + static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { - unsigned bit, byte, pfec; - u8 map; - bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; + unsigned byte; + + const u8 x = BYTE_MASK(ACC_EXEC_MASK); + const u8 w = BYTE_MASK(ACC_WRITE_MASK); + const u8 u = BYTE_MASK(ACC_USER_MASK); + + bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0; + bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0; + bool cr0_wp = is_write_protection(vcpu); - cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); - cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { - pfec = byte << 1; - map = 0; - wf = pfec & PFERR_WRITE_MASK; - uf = pfec & PFERR_USER_MASK; - ff = pfec & PFERR_FETCH_MASK; + unsigned pfec = byte << 1; + /* - * PFERR_RSVD_MASK bit is set in PFEC if the access is not - * subject to SMAP restrictions, and cleared otherwise. The - * bit is only meaningful if the SMAP bit is set in CR4. + * Each "*f" variable has a 1 bit for each UWX value + * that causes a fault with the given PFEC. */ - smapf = !(pfec & PFERR_RSVD_MASK); - for (bit = 0; bit < 8; ++bit) { - x = bit & ACC_EXEC_MASK; - w = bit & ACC_WRITE_MASK; - u = bit & ACC_USER_MASK; - - if (!ept) { - /* Not really needed: !nx will cause pte.nx to fault */ - x |= !mmu->nx; - /* Allow supervisor writes if !cr0.wp */ - w |= !is_write_protection(vcpu) && !uf; - /* Disallow supervisor fetches of user code if cr4.smep */ - x &= !(cr4_smep && u && !uf); - - /* - * SMAP:kernel-mode data accesses from user-mode - * mappings should fault. A fault is considered - * as a SMAP violation if all of the following - * conditions are ture: - * - X86_CR4_SMAP is set in CR4 - * - A user page is accessed - * - Page fault in kernel mode - * - if CPL = 3 or X86_EFLAGS_AC is clear - * - * Here, we cover the first three conditions. - * The fourth is computed dynamically in - * permission_fault() and is in smapf. - * - * Also, SMAP does not affect instruction - * fetches, add the !ff check here to make it - * clearer. - */ - smap = cr4_smap && u && !uf && !ff; - } - fault = (ff && !x) || (uf && !u) || (wf && !w) || - (smapf && smap); - map |= fault << bit; + /* Faults from writes to non-writable pages */ + u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; + /* Faults from user mode accesses to supervisor pages */ + u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; + /* Faults from fetches of non-executable pages*/ + u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; + /* Faults from kernel mode fetches of user pages */ + u8 smepf = 0; + /* Faults from kernel mode accesses of user pages */ + u8 smapf = 0; + + if (!ept) { + /* Faults from kernel mode accesses to user pages */ + u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; + + /* Not really needed: !nx will cause pte.nx to fault */ + if (!mmu->nx) + ff = 0; + + /* Allow supervisor writes if !cr0.wp */ + if (!cr0_wp) + wf = (pfec & PFERR_USER_MASK) ? wf : 0; + + /* Disallow supervisor fetches of user code if cr4.smep */ + if (cr4_smep) + smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; + + /* + * SMAP:kernel-mode data accesses from user-mode + * mappings should fault. A fault is considered + * as a SMAP violation if all of the following + * conditions are ture: + * - X86_CR4_SMAP is set in CR4 + * - A user page is accessed + * - The access is not a fetch + * - Page fault in kernel mode + * - if CPL = 3 or X86_EFLAGS_AC is clear + * + * Here, we cover the first three conditions. + * The fourth is computed dynamically in permission_fault(); + * PFERR_RSVD_MASK bit will be set in PFEC if the access is + * *not* subject to SMAP restrictions. + */ + if (cr4_smap) + smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; } - mmu->permissions[byte] = map; + + mmu->permissions[byte] = ff | uf | wf | smepf | smapf; } } @@ -4358,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, static void paging64_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); + int root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; + + paging64_init_context_common(vcpu, context, root_level); } static void paging32_init_context(struct kvm_vcpu *vcpu, @@ -4399,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; - context->shadow_root_level = kvm_x86_ops->get_tdp_level(); + context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); context->root_hpa = INVALID_PAGE; context->direct_map = true; context->set_cr3 = kvm_x86_ops->set_tdp_cr3; @@ -4413,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->root_level = 0; } else if (is_long_mode(vcpu)) { context->nx = is_nx(vcpu); - context->root_level = PT64_ROOT_LEVEL; + context->root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; reset_rsvds_bits_mask(vcpu, context); context->gva_to_gpa = paging64_gva_to_gpa; } else if (is_pae(vcpu)) { @@ -4470,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, MMU_WARN_ON(VALID_PAGE(context->root_hpa)); - context->shadow_root_level = kvm_x86_ops->get_tdp_level(); + context->shadow_root_level = PT64_ROOT_4LEVEL; context->nx = true; context->ept_ad = accessed_dirty; @@ -4479,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->sync_page = ept_sync_page; context->invlpg = ept_invlpg; context->update_pte = ept_update_pte; - context->root_level = context->shadow_root_level; + context->root_level = PT64_ROOT_4LEVEL; context->root_hpa = INVALID_PAGE; context->direct_map = false; context->base_role.ad_disabled = !accessed_dirty; @@ -4524,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; } else if (is_long_mode(vcpu)) { g_context->nx = is_nx(vcpu); - g_context->root_level = PT64_ROOT_LEVEL; + g_context->root_level = is_la57_mode(vcpu) ? + PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; reset_rsvds_bits_mask(vcpu, g_context); g_context->gva_to_gpa = paging64_gva_to_gpa_nested; } else if (is_pae(vcpu)) { @@ -4814,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -static void make_mmu_pages_available(struct kvm_vcpu *vcpu) +static int make_mmu_pages_available(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) - return; + return 0; while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) @@ -4828,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) ++vcpu->kvm->stat.mmu_recycled; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + + if (!kvm_mmu_available_pages(vcpu->kvm)) + return -ENOSPC; + return 0; } int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, @@ -4835,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, { int r, emulation_type = EMULTYPE_RETRY; enum emulation_result er; - bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); + bool direct = vcpu->arch.mmu.direct_map; + + /* With shadow page tables, fault_address contains a GVA or nGPA. */ + if (vcpu->arch.mmu.direct_map) { + vcpu->arch.gpa_available = true; + vcpu->arch.gpa_val = cr2; + } if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2, direct); @@ -4847,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; if (r < 0) return r; + /* Must be RET_MMIO_PF_INVALID. */ } r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), @@ -4862,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * This can occur when using nested virtualization with nested * paging in both guests. If true, we simply unprotect the page * and resume the guest. - * - * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used - * in PFERR_NEXT_GUEST_PAGE) */ - if (error_code == PFERR_NESTED_GUEST_PAGE) { + if (vcpu->arch.mmu.direct_map && + (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); return 1; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4b9a3ae6b725..64a2dbd2b1af 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -37,7 +37,8 @@ #define PT32_DIR_PSE36_MASK \ (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) -#define PT64_ROOT_LEVEL 4 +#define PT64_ROOT_5LEVEL 5 +#define PT64_ROOT_4LEVEL 4 #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 @@ -48,6 +49,9 @@ static inline u64 rsvd_bits(int s, int e) { + if (e < s) + return 0; + return ((1ULL << (e - s + 1)) - 1) << s; } @@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); -/* - * Return values of handle_mmio_page_fault: - * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction - * directly. - * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page - * fault path update the mmio spte. - * RET_MMIO_PF_RETRY: let CPU fault again on the address. - * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). - */ -enum { - RET_MMIO_PF_EMULATE = 1, - RET_MMIO_PF_INVALID = 2, - RET_MMIO_PF_RETRY = 0, - RET_MMIO_PF_BUG = -1 -}; - -int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index dcce533d420c..d22ddbdf5e6e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; sp = page_header(root); - __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL); + __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level); return; } diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 0149ac59c273..e9ea2d45ae66 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) * enable MTRRs and it is obviously undesirable to run the * guest entirely with UC memory and we use WB. */ - if (guest_cpuid_has_mtrr(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) return MTRR_TYPE_UNCACHABLE; else return MTRR_TYPE_WRBACK; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b0454c7e4cff..86b68dc5a649 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, &map_writable)) return 0; - if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, - walker.gfn, pfn, walker.pte_access, &r)) + if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) return r; /* @@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); - make_mmu_pages_available(vcpu); + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8dbd8dbc83eb..2c1cfe68a9af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO); static int vls = true; module_param(vls, int, 0444); -/* AVIC VM ID bit masks and lock */ -static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); -static DEFINE_SPINLOCK(avic_vm_id_lock); +/* enable/disable Virtual GIF */ +static int vgif = true; +module_param(vgif, int, 0444); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) recalc_intercepts(svm); } +static inline bool vgif_enabled(struct vcpu_svm *svm) +{ + return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); +} + static inline void enable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags |= HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl |= V_GIF_MASK; + else + svm->vcpu.arch.hflags |= HF_GIF_MASK; } static inline void disable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl &= ~V_GIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; } static inline bool gif_set(struct vcpu_svm *svm) { - return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); + if (vgif_enabled(svm)) + return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); + else + return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); } static unsigned long iopm_base; @@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid) asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); } -static int get_npt_level(void) +static int get_npt_level(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - return PT64_ROOT_LEVEL; + return PT64_ROOT_4LEVEL; #else return PT32E_ROOT_LEVEL; #endif @@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.reinject; + bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; /* @@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) static void disable_nmi_singlestep(struct vcpu_svm *svm) { svm->nmi_singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { /* Clear our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) @@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) */ #define SVM_VM_DATA_HASH_BITS 8 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); +static u32 next_vm_id = 0; +static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); /* Note: @@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void) } } + if (vgif) { + if (!boot_cpu_has(X86_FEATURE_VGIF)) + vgif = false; + else + pr_info("Virtual GIF supported\n"); + } + return 0; err: @@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; } + if (vgif) { + clr_intercept(svm, INTERCEPT_STGI); + clr_intercept(svm, INTERCEPT_CLGI); + svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } -static inline int avic_get_next_vm_id(void) -{ - int id; - - spin_lock(&avic_vm_id_lock); - - /* AVIC VM ID is one-based. */ - id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1); - if (id <= AVIC_VM_ID_MASK) - __set_bit(id, avic_vm_id_bitmap); - else - id = -EAGAIN; - - spin_unlock(&avic_vm_id_lock); - return id; -} - -static inline int avic_free_vm_id(int id) -{ - if (id <= 0 || id > AVIC_VM_ID_MASK) - return -EINVAL; - - spin_lock(&avic_vm_id_lock); - __clear_bit(id, avic_vm_id_bitmap); - spin_unlock(&avic_vm_id_lock); - return 0; -} - static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; @@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm) if (!avic) return; - avic_free_vm_id(vm_data->avic_vm_id); - if (vm_data->avic_logical_id_table_page) __free_page(vm_data->avic_logical_id_table_page); if (vm_data->avic_physical_id_table_page) @@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm) static int avic_vm_init(struct kvm *kvm) { unsigned long flags; - int vm_id, err = -ENOMEM; + int err = -ENOMEM; struct kvm_arch *vm_data = &kvm->arch; struct page *p_page; struct page *l_page; + struct kvm_arch *ka; + u32 vm_id; if (!avic) return 0; - vm_id = avic_get_next_vm_id(); - if (vm_id < 0) - return vm_id; - vm_data->avic_vm_id = (u32)vm_id; - /* Allocating physical APIC ID table (4KB) */ p_page = alloc_page(GFP_KERNEL); if (!p_page) @@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm) clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); + again: + vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; + if (vm_id == 0) { /* id is 1-based, zero is not okay */ + next_vm_id_wrapped = 1; + goto again; + } + /* Is it still in use? Only possible if wrapped at least once */ + if (next_vm_id_wrapped) { + hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { + struct kvm *k2 = container_of(ka, struct kvm, arch); + struct kvm_arch *vd2 = &k2->arch; + if (vd2->avic_vm_id == vm_id) + goto again; + } + } + vm_data->avic_vm_id = vm_id; hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(svm); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) @@ -2384,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu.shadow_root_level = get_npt_level(); + vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -3147,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the NMI window; remove it now. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ret = kvm_skip_emulated_instruction(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); @@ -3744,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { - kvm_vcpu_on_spin(&(svm->vcpu)); + struct kvm_vcpu *vcpu = &svm->vcpu; + bool in_kernel = (svm_get_cpl(vcpu) == 0); + + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4228,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -4682,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes * 1, because that's a separate STGI/VMRUN intercept. The next time we * get that intercept, this function will be called again though and - * we'll get the vintr intercept. + * we'll get the vintr intercept. However, if the vGIF feature is + * enabled, the STGI interception will not occur. Enable the irq + * window under the assumption that the hardware will set the GIF. */ - if (gif_set(svm) && nested_svm_intr(svm)) { + if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -4698,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ - if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) + if (!gif_set(svm)) { + if (vgif_enabled(svm)) + set_intercept(svm, INTERCEPT_STGI); return; /* STGI will cause a vm exit */ + } if (svm->nested.exit_required) return; /* we're not going to run the guest yet */ @@ -5071,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_cpuid_entry2 *entry; /* Update nrips enabled cache */ - svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); + svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); if (!kvm_vcpu_apicv_active(vcpu)) return; - entry = kvm_find_cpuid_entry(vcpu, 1, 0); - if (entry) - entry->ecx &= ~bit(X86_FEATURE_X2APIC); + guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); } static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0a6cc6754ec5..8a202c49e2a0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio, */ TRACE_EVENT(kvm_cpuid, TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, - unsigned long rcx, unsigned long rdx), - TP_ARGS(function, rax, rbx, rcx, rdx), + unsigned long rcx, unsigned long rdx, bool found), + TP_ARGS(function, rax, rbx, rcx, rdx, found), TP_STRUCT__entry( __field( unsigned int, function ) @@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid, __field( unsigned long, rbx ) __field( unsigned long, rcx ) __field( unsigned long, rdx ) + __field( bool, found ) ), TP_fast_assign( @@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid, __entry->rbx = rbx; __entry->rcx = rcx; __entry->rdx = rdx; + __entry->found = found; ), - TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx", + TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s", __entry->function, __entry->rax, - __entry->rbx, __entry->rcx, __entry->rdx) + __entry->rbx, __entry->rcx, __entry->rdx, + __entry->found ? "found" : "not found") ); #define AREG(x) { APIC_##x, "APIC_" #x } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70b90c0810d0..4253adef9044 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -243,11 +243,13 @@ struct __packed vmcs12 { u64 virtual_apic_page_addr; u64 apic_access_addr; u64 posted_intr_desc_addr; + u64 vm_function_control; u64 ept_pointer; u64 eoi_exit_bitmap0; u64 eoi_exit_bitmap1; u64 eoi_exit_bitmap2; u64 eoi_exit_bitmap3; + u64 eptp_list_address; u64 xss_exit_bitmap; u64 guest_physical_address; u64 vmcs_link_pointer; @@ -481,6 +483,7 @@ struct nested_vmx { u64 nested_vmx_cr4_fixed0; u64 nested_vmx_cr4_fixed1; u64 nested_vmx_vmcs_enum; + u64 nested_vmx_vmfunc_controls; }; #define POSTED_INTR_ON 0 @@ -573,6 +576,8 @@ struct vcpu_vmx { #endif u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; + u32 secondary_exec_control; + /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -761,11 +766,13 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), FIELD64(APIC_ACCESS_ADDR, apic_access_addr), FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), + FIELD64(VM_FUNCTION_CONTROL, vm_function_control), FIELD64(EPT_POINTER, ept_pointer), FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), + FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), @@ -889,25 +896,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->nested.cached_vmcs12; } -static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) -{ - struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); - if (is_error_page(page)) - return NULL; - - return page; -} - -static void nested_release_page(struct page *page) -{ - kvm_release_page_dirty(page); -} - -static void nested_release_page_clean(struct page *page) -{ - kvm_release_page_clean(page); -} - static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); @@ -1212,6 +1200,16 @@ static inline bool cpu_has_vmx_ept_4levels(void) return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; } +static inline bool cpu_has_vmx_ept_mt_wb(void) +{ + return vmx_capability.ept & VMX_EPTP_WB_BIT; +} + +static inline bool cpu_has_vmx_ept_5levels(void) +{ + return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT; +} + static inline bool cpu_has_vmx_ept_ad_bits(void) { return vmx_capability.ept & VMX_EPT_AD_BIT; @@ -1317,6 +1315,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) SECONDARY_EXEC_TSC_SCALING; } +static inline bool cpu_has_vmx_vmfunc(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VMFUNC; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1357,8 +1361,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) { - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && - vmx_xsaves_supported(); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); } static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) @@ -1391,6 +1394,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } +static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); +} + +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) +{ + return nested_cpu_has_vmfunc(vmcs12) && + (vmcs12->vm_function_control & + VMX_VMFUNC_EPTP_SWITCHING); +} + static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -2450,15 +2465,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. */ -static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) +static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); unsigned int nr = vcpu->arch.exception.nr; if (nr == PF_VECTOR) { if (vcpu->arch.exception.nested_apf) { - nested_vmx_inject_exception_vmexit(vcpu, - vcpu->arch.apf.nested_apf_token); + *exit_qual = vcpu->arch.apf.nested_apf_token; return 1; } /* @@ -2472,16 +2486,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) */ if (nested_vmx_is_page_fault_vmexit(vmcs12, vcpu->arch.exception.error_code)) { - nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); + *exit_qual = vcpu->arch.cr2; return 1; } } else { - unsigned long exit_qual = 0; - if (nr == DB_VECTOR) - exit_qual = vcpu->arch.dr6; - if (vmcs12->exception_bitmap & (1u << nr)) { - nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + if (nr == DB_VECTOR) + *exit_qual = vcpu->arch.dr6; + else + *exit_qual = 0; return 1; } } @@ -2494,14 +2507,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.reinject; u32 error_code = vcpu->arch.exception.error_code; u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (!reinject && is_guest_mode(vcpu) && - nested_vmx_check_exception(vcpu)) - return; - if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; @@ -2600,7 +2608,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) + if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) move_msr_up(vmx, index, save_nmsrs++); /* * MSR_STAR is only needed on long mode guests, and only @@ -2660,12 +2668,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); - return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31))); -} - /* * nested_vmx_allowed() checks whether a guest should be allowed to use VMX * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for @@ -2674,7 +2676,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) */ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) { - return nested && guest_cpuid_has_vmx(vcpu); + return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); } /* @@ -2797,21 +2799,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); - /* secondary cpu-based controls */ + /* + * secondary cpu-based controls. Do not include those that + * depend on CPUID bits, they are added later by vmx_cpuid_update. + */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, vmx->nested.nested_vmx_secondary_ctls_low, vmx->nested.nested_vmx_secondary_ctls_high); vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= - SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_XSAVES; + SECONDARY_EXEC_WBINVD_EXITING; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ @@ -2834,6 +2836,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (cpu_has_vmx_vmfunc()) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_VMFUNC; + /* + * Advertise EPTP switching unconditionally + * since we emulate it + */ + vmx->nested.nested_vmx_vmfunc_controls = + VMX_VMFUNC_EPTP_SWITCHING; + } + /* * Old versions of KVM use the single-context version without * checking for support, so declare that it is supported even @@ -3203,6 +3216,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; + case MSR_IA32_VMX_VMFUNC: + *pdata = vmx->nested.nested_vmx_vmfunc_controls; + break; default: return 1; } @@ -3256,7 +3272,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || - (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -3280,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; /* Otherwise falls through */ default: @@ -3339,9 +3357,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || - (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; - if (is_noncanonical_address(data & PAGE_MASK) || + if (is_noncanonical_address(data & PAGE_MASK, vcpu) || (data & MSR_IA32_BNDCFGS_RSVD)) return 1; vmcs_write64(GUEST_BNDCFGS, data); @@ -3402,7 +3421,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -3639,8 +3659,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_RDSEED | + SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_TSC_SCALING; + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_VMFUNC; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -4272,16 +4295,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmx->emulation_required = emulation_required(vcpu); } +static int get_ept_level(struct kvm_vcpu *vcpu) +{ + if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) + return 5; + return 4; +} + static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) { - u64 eptp; + u64 eptp = VMX_EPTP_MT_WB; + + eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; - /* TODO write the value reading from MSR */ - eptp = VMX_EPT_DEFAULT_MT | - VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; if (enable_ept_ad_bits && (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) - eptp |= VMX_EPT_AD_ENABLE_BIT; + eptp |= VMX_EPTP_AD_ENABLE_BIT; eptp |= (root_hpa & PAGE_MASK); return eptp; @@ -5243,10 +5272,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) +static bool vmx_rdrand_supported(void) { + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_RDRAND; +} + +static bool vmx_rdseed_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_RDSEED; +} + +static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) +{ + struct kvm_vcpu *vcpu = &vmx->vcpu; + u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) + if (!cpu_need_virtualize_apic_accesses(vcpu)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; @@ -5260,7 +5303,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!kvm_vcpu_apicv_active(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -5274,7 +5317,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) if (!enable_pml) exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - return exec_control; + if (vmx_xsaves_supported()) { + /* Exposing XSAVES only when XSAVE is exposed */ + bool xsaves_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); + + if (!xsaves_enabled) + exec_control &= ~SECONDARY_EXEC_XSAVES; + + if (nested) { + if (xsaves_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_XSAVES; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_XSAVES; + } + } + + if (vmx_rdtscp_supported()) { + bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); + if (!rdtscp_enabled) + exec_control &= ~SECONDARY_EXEC_RDTSCP; + + if (nested) { + if (rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDTSCP; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; + } + } + + if (vmx_invpcid_supported()) { + /* Exposing INVPCID only when PCID is exposed */ + bool invpcid_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && + guest_cpuid_has(vcpu, X86_FEATURE_PCID); + + if (!invpcid_enabled) { + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; + guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); + } + + if (nested) { + if (invpcid_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_INVPCID; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_INVPCID; + } + } + + if (vmx_rdrand_supported()) { + bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); + if (rdrand_enabled) + exec_control &= ~SECONDARY_EXEC_RDRAND; + + if (nested) { + if (rdrand_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDRAND; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDRAND; + } + } + + if (vmx_rdseed_supported()) { + bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); + if (rdseed_enabled) + exec_control &= ~SECONDARY_EXEC_RDSEED; + + if (nested) { + if (rdseed_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDSEED; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDSEED; + } + } + + vmx->secondary_exec_control = exec_control; } static void ept_set_mmio_spte_mask(void) @@ -5318,8 +5446,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - vmx_secondary_exec_control(vmx)); + vmx->secondary_exec_control); } if (kvm_vcpu_apicv_active(&vmx->vcpu)) { @@ -5357,6 +5486,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ #endif + if (cpu_has_vmx_vmfunc()) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); @@ -5835,6 +5967,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) static int handle_triple_fault(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; return 0; } @@ -6330,7 +6463,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; gpa_t gpa; - u32 error_code; + u64 error_code; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -6362,9 +6495,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) EPT_VIOLATION_EXECUTABLE)) ? PFERR_PRESENT_MASK : 0; - vcpu->arch.gpa_available = true; - vcpu->arch.exit_qualification = exit_qualification; + error_code |= (exit_qualification & 0x100) != 0 ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + vcpu->arch.exit_qualification = exit_qualification; return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } @@ -6373,23 +6507,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) int ret; gpa_t gpa; + /* + * A nested guest cannot optimize MMIO vmexits, because we have an + * nGPA here instead of the required GPA. + */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + if (!is_guest_mode(vcpu) && + !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); return kvm_skip_emulated_instruction(vcpu); } - ret = handle_mmio_page_fault(vcpu, gpa, true); - vcpu->arch.gpa_available = true; - if (likely(ret == RET_MMIO_PF_EMULATE)) - return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == - EMULATE_DONE; - - if (unlikely(ret == RET_MMIO_PF_INVALID)) - return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); - - if (unlikely(ret == RET_MMIO_PF_RETRY)) - return 1; + ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); + if (ret >= 0) + return ret; /* It is the real ept misconfig */ WARN_ON(1); @@ -6611,7 +6742,8 @@ static __init int hardware_setup(void) init_vmcs_shadow_fields(); if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels()) { + !cpu_has_vmx_ept_4levels() || + !cpu_has_vmx_ept_mt_wb()) { enable_ept = 0; enable_unrestricted_guest = 0; enable_ept_ad_bits = 0; @@ -6754,7 +6886,13 @@ static int handle_pause(struct kvm_vcpu *vcpu) if (ple_gap) grow_ple_window(vcpu); - kvm_vcpu_on_spin(vcpu); + /* + * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" + * VM-execution control is ignored if CPL > 0. OTOH, KVM + * never set PAUSE_EXITING and just set PLE if supported, + * so the vcpu must be CPL=0 if it gets a PAUSE exit. + */ + kvm_vcpu_on_spin(vcpu, true); return kvm_skip_emulated_instruction(vcpu); } @@ -6769,6 +6907,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } +static int handle_invalid_op(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + static int handle_monitor_trap(struct kvm_vcpu *vcpu) { return 1; @@ -6985,7 +7129,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret); + exn = is_noncanonical_address(*ret, vcpu); } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: @@ -7149,19 +7293,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { + page = kvm_vcpu_gpa_to_page(vcpu, vmptr); + if (is_error_page(page)) { nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } if (*(u32 *)kmap(page) != VMCS12_REVISION) { kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); @@ -7242,16 +7386,16 @@ static void free_nested(struct vcpu_vmx *vmx) kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { - nested_release_page(vmx->nested.apic_access_page); + kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } if (vmx->nested.virtual_apic_page) { - nested_release_page(vmx->nested.virtual_apic_page); + kvm_release_page_dirty(vmx->nested.virtual_apic_page); vmx->nested.virtual_apic_page = NULL; } if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); vmx->nested.pi_desc_page = NULL; vmx->nested.pi_desc = NULL; } @@ -7618,15 +7762,15 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { + page = kvm_vcpu_gpa_to_page(vcpu, vmptr); + if (is_error_page(page)) { nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } new_vmcs12 = kmap(page); if (new_vmcs12->revision_id != VMCS12_REVISION) { kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); return kvm_skip_emulated_instruction(vcpu); @@ -7639,7 +7783,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) */ memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); set_current_vmptr(vmx, vmptr); } @@ -7790,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: - if (is_noncanonical_address(operand.gla)) { + if (is_noncanonical_address(operand.gla, vcpu)) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); return kvm_skip_emulated_instruction(vcpu); @@ -7847,6 +7991,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) return 1; } +static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + /* Check for memory type validity */ + switch (address & VMX_EPTP_MT_MASK) { + case VMX_EPTP_MT_UC: + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) + return false; + break; + case VMX_EPTP_MT_WB: + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) + return false; + break; + default: + return false; + } + + /* only 4 levels page-walk length are valid */ + if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) + return false; + + /* Reserved bits should not be set */ + if (address >> maxphyaddr || ((address >> 7) & 0x1f)) + return false; + + /* AD, if set, should be supported */ + if (address & VMX_EPTP_AD_ENABLE_BIT) { + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) + return false; + } + + return true; +} + +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; + u64 address; + bool accessed_dirty; + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + + if (!nested_cpu_has_eptp_switching(vmcs12) || + !nested_cpu_has_ept(vmcs12)) + return 1; + + if (index >= VMFUNC_EPTP_ENTRIES) + return 1; + + + if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, + &address, index * 8, 8)) + return 1; + + accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT); + + /* + * If the (L2) guest does a vmfunc to the currently + * active ept pointer, we don't have to do anything else + */ + if (vmcs12->ept_pointer != address) { + if (!valid_ept_address(vcpu, address)) + return 1; + + kvm_mmu_unload(vcpu); + mmu->ept_ad = accessed_dirty; + mmu->base_role.ad_disabled = !accessed_dirty; + vmcs12->ept_pointer = address; + /* + * TODO: Check what's the correct approach in case + * mmu reload fails. Currently, we just let the next + * reload potentially fail + */ + kvm_mmu_reload(vcpu); + } + + return 0; +} + +static int handle_vmfunc(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12; + u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; + + /* + * VMFUNC is only supported for nested guests, but we always enable the + * secondary control for simplicity; for non-nested mode, fake that we + * didn't by injecting #UD. + */ + if (!is_guest_mode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmcs12 = get_vmcs12(vcpu); + if ((vmcs12->vm_function_control & (1 << function)) == 0) + goto fail; + + switch (function) { + case 0: + if (nested_vmx_eptp_switching(vcpu, vmcs12)) + goto fail; + break; + default: + goto fail; + } + return kvm_skip_emulated_instruction(vcpu); + +fail: + nested_vmx_vmexit(vcpu, vmx->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -7894,9 +8156,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, + [EXIT_REASON_RDRAND] = handle_invalid_op, + [EXIT_REASON_RDSEED] = handle_invalid_op, [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_VMFUNC] = handle_vmfunc, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, }; @@ -8212,6 +8477,10 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) * table is L0's fault. */ return false; + case EXIT_REASON_INVPCID: + return + nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && + nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -8229,6 +8498,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_PML_FULL: /* We emulate PML support to L1. */ return false; + case EXIT_REASON_VMFUNC: + /* VM functions are emulated through L2->L0 vmexits. */ + return false; default: return true; } @@ -8487,7 +8759,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 vectoring_info = vmx->idt_vectoring_info; trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - vcpu->arch.gpa_available = false; /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more @@ -9341,11 +9612,6 @@ static void __init vmx_check_processor_compat(void *rtn) } } -static int get_ept_level(void) -{ - return VMX_EPT_DEFAULT_GAW + 1; -} - static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { u8 cache; @@ -9462,39 +9728,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { - struct kvm_cpuid_entry2 *best; struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); - if (vmx_rdtscp_supported()) { - bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); - if (!rdtscp_enabled) - secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; - - if (nested) { - if (rdtscp_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_RDTSCP; - else - vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; - } - } - - /* Exposing INVPCID only when PCID is exposed */ - best = kvm_find_cpuid_entry(vcpu, 0x7, 0); - if (vmx_invpcid_supported() && - (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || - !guest_cpuid_has_pcid(vcpu))) { - secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; - - if (best) - best->ebx &= ~bit(X86_FEATURE_INVPCID); + if (cpu_has_secondary_exec_ctrls()) { + vmx_compute_secondary_exec_control(vmx); + vmcs_set_secondary_exec_control(vmx->secondary_exec_control); } - if (cpu_has_secondary_exec_ctrls()) - vmcs_set_secondary_exec_control(secondary_exec_ctl); - if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -9535,7 +9775,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) { - return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; + return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT; } /* Callbacks for nested_ept_init_mmu_context: */ @@ -9548,18 +9788,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { - bool wants_ad; - WARN_ON(mmu_is_nested(vcpu)); - wants_ad = nested_ept_ad_enabled(vcpu); - if (wants_ad && !enable_ept_ad_bits) + if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) return 1; kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, - wants_ad); + nested_ept_ad_enabled(vcpu)); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; @@ -9610,6 +9847,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct page *page; u64 hpa; if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -9619,17 +9857,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, * physical address remains valid. We keep a reference * to it so we can release it later. */ - if (vmx->nested.apic_access_page) /* shouldn't happen */ - nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = - nested_get_page(vcpu, vmcs12->apic_access_addr); + if (vmx->nested.apic_access_page) { /* shouldn't happen */ + kvm_release_page_dirty(vmx->nested.apic_access_page); + vmx->nested.apic_access_page = NULL; + } + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); /* * If translation failed, no matter: This feature asks * to exit when accessing the given address, and if it * can never be accessed, this feature won't do * anything anyway. */ - if (vmx->nested.apic_access_page) { + if (!is_error_page(page)) { + vmx->nested.apic_access_page = page; hpa = page_to_phys(vmx->nested.apic_access_page); vmcs_write64(APIC_ACCESS_ADDR, hpa); } else { @@ -9644,10 +9884,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - if (vmx->nested.virtual_apic_page) /* shouldn't happen */ - nested_release_page(vmx->nested.virtual_apic_page); - vmx->nested.virtual_apic_page = - nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); + if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ + kvm_release_page_dirty(vmx->nested.virtual_apic_page); + vmx->nested.virtual_apic_page = NULL; + } + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); /* * If translation failed, VM entry will fail because @@ -9662,7 +9903,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, * control. But such a configuration is useless, so * let's keep the code simple. */ - if (vmx->nested.virtual_apic_page) { + if (!is_error_page(page)) { + vmx->nested.virtual_apic_page = page; hpa = page_to_phys(vmx->nested.virtual_apic_page); vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); } @@ -9671,16 +9913,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, if (nested_cpu_has_posted_intr(vmcs12)) { if (vmx->nested.pi_desc_page) { /* shouldn't happen */ kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); + vmx->nested.pi_desc_page = NULL; } - vmx->nested.pi_desc_page = - nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); - vmx->nested.pi_desc = - (struct pi_desc *)kmap(vmx->nested.pi_desc_page); - if (!vmx->nested.pi_desc) { - nested_release_page_clean(vmx->nested.pi_desc_page); + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); + if (is_error_page(page)) return; - } + vmx->nested.pi_desc_page = page; + vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); vmx->nested.pi_desc = (struct pi_desc *)((void *)vmx->nested.pi_desc + (unsigned long)(vmcs12->posted_intr_desc_addr & @@ -9746,6 +9986,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + return 0; + + if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) + return -EINVAL; + + return 0; +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -9762,8 +10014,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; - page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); + if (is_error_page(page)) return false; msr_bitmap_l1 = (unsigned long *)kmap(page); @@ -9793,7 +10045,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, } } kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); return true; } @@ -10187,13 +10439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, enable_ept ? vmcs12->page_fault_error_code_match : 0); if (cpu_has_secondary_exec_ctrls()) { - exec_control = vmx_secondary_exec_control(vmx); + exec_control = vmx->secondary_exec_control; /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | + SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_ENABLE_VMFUNC); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & @@ -10201,6 +10456,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control |= vmcs12_exec_ctrl; } + /* All VMFUNCs are currently emulated through L0 vmexits. */ + if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); @@ -10426,6 +10685,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -10453,6 +10715,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.nested_vmx_entry_ctls_high)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_cpu_has_vmfunc(vmcs12)) { + if (vmcs12->vm_function_control & + ~vmx->nested.nested_vmx_vmfunc_controls) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + if (nested_cpu_has_eptp_switching(vmcs12)) { + if (!nested_cpu_has_ept(vmcs12) || + !page_address_valid(vcpu, vmcs12->eptp_list_address)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + } + } + if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -10699,7 +10973,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, u32 idt_vectoring; unsigned int nr; - if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { + if (vcpu->arch.exception.injected) { nr = vcpu->arch.exception.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -10738,12 +11012,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qual; - if (vcpu->arch.exception.pending || - vcpu->arch.nmi_injected || - vcpu->arch.interrupt.pending) + if (kvm_event_needs_reinjection(vcpu)) return -EBUSY; + if (vcpu->arch.exception.pending && + nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vmx->nested.nested_run_pending) + return -EBUSY; + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + vcpu->arch.exception.pending = false; + return 0; + } + if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { if (vmx->nested.nested_run_pending) @@ -11184,16 +11466,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* Unpin physical memory we referred to in vmcs02 */ if (vmx->nested.apic_access_page) { - nested_release_page(vmx->nested.apic_access_page); + kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } if (vmx->nested.virtual_apic_page) { - nested_release_page(vmx->nested.virtual_apic_page); + kvm_release_page_dirty(vmx->nested.virtual_apic_page); vmx->nested.virtual_apic_page = NULL; } if (vmx->nested.pi_desc_page) { kunmap(vmx->nested.pi_desc_page); - nested_release_page(vmx->nested.pi_desc_page); + kvm_release_page_dirty(vmx->nested.pi_desc_page); vmx->nested.pi_desc_page = NULL; vmx->nested.pi_desc = NULL; } @@ -11369,14 +11651,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; - page = nested_get_page(vcpu, vmcs12->pml_address); - if (!page) + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); + if (is_error_page(page)) return 0; pml_address = kmap(page); pml_address[vmcs12->guest_pml_index--] = gpa; kunmap(page); - nested_release_page_clean(page); + kvm_release_page_clean(page); } return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef5102f80497..6069af86da3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); u64 new_state = msr_info->data & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | - 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); + u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | + (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) + return 1; if (!msr_info->host_initiated && - ((msr_info->data & reserved_bits) != 0 || - new_state == X2APIC_ENABLE || - (new_state == MSR_IA32_APICBASE_ENABLE && + ((new_state == MSR_IA32_APICBASE_ENABLE && old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && old_state == 0))) @@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); - if (!vcpu->arch.exception.pending) { + if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { queue: if (has_error && !is_protmode(vcpu)) has_error = false; - vcpu->arch.exception.pending = true; + if (reinject) { + /* + * On vmentry, vcpu->arch.exception.pending is only + * true if an event injection was blocked by + * nested_run_pending. In that case, however, + * vcpu_enter_guest requests an immediate exit, + * and the guest shouldn't proceed far enough to + * need reinjection. + */ + WARN_ON_ONCE(vcpu->arch.exception.pending); + vcpu->arch.exception.injected = true; + } else { + vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; + } vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; - vcpu->arch.exception.reinject = reinject; return; } @@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, class2 = exception_class(nr); if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { - /* generate double fault per SDM Table 5-5 */ + /* + * Generate double fault per SDM Table 5-5. Set + * exception.pending = true so that the double fault + * can trigger a nested vmexit. + */ vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; @@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & CR4_RESERVED_BITS) return 1; - if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) + return 1; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) return 1; - if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) return 1; - if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) return 1; - if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) return 1; - if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) return 1; if (is_long_mode(vcpu)) { @@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { - if (!guest_cpuid_has_pcid(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) return 1; /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ @@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return 0; } - if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESERVED_BITS) - return 1; - } else if (is_pae(vcpu) && is_paging(vcpu) && + if (is_long_mode(vcpu) && + (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62))) + return 1; + else if (is_pae(vcpu) && is_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; @@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { u64 fixed = DR6_FIXED_1; - if (!guest_cpuid_has_rtm(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; return fixed; } @@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = { MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, HV_X64_MSR_RESET, @@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) if (efer & efer_reserved_bits) return false; - if (efer & EFER_FFXSR) { - struct kvm_cpuid_entry2 *feat; - - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) + if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) return false; - } - if (efer & EFER_SVME) { - struct kvm_cpuid_entry2 *feat; - - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) + if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return false; - } return true; } @@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(msr->data)) + if (is_noncanonical_address(msr->data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - msr->data = get_canonical(msr->data); + msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); } return kvm_x86_ops->set_msr(vcpu, msr); } @@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; - if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) + if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) update_ia32_tsc_adjust_msr(vcpu, offset); + kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); @@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_set_lapic_tscdeadline_msr(vcpu, data); break; case MSR_IA32_TSC_ADJUST: - if (guest_cpuid_has_tsc_adjust(vcpu)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); @@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.length = data; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.status = data; break; @@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0xbe702111; break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.length; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.status; break; @@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { process_nmi(vcpu); + /* + * FIXME: pass injected and pending separately. This is only + * needed for nested virtualization, whose state cannot be + * migrated yet. For now we can combine them. + */ events->exception.injected = - vcpu->arch.exception.pending && + (vcpu->arch.exception.pending || + vcpu->arch.exception.injected) && !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; @@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); + vcpu->arch.exception.injected = false; vcpu->arch.exception.pending = events->exception.injected; vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; @@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, */ if (vcpu->arch.gpa_available && emulator_can_use_gpa(ctxt) && - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { - gpa = exception->address; - goto mmio; + (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { + gpa = vcpu->arch.gpa_val; + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); + } else { + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); + if (ret < 0) + return X86EMUL_PROPAGATE_FAULT; } - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); - - if (ret < 0) - return X86EMUL_PROPAGATE_FAULT; - - /* For APIC access vmexit */ - if (ret) - goto mmio; - - if (ops->read_write_emulate(vcpu, gpa, val, bytes)) + if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) return X86EMUL_CONTINUE; -mmio: /* * Is this MMIO handled locally? */ @@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } -static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { - kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); + return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); } static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) @@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) int r; /* try to reinject previous events if any */ + if (vcpu->arch.exception.injected) { + kvm_x86_ops->queue_exception(vcpu); + return 0; + } + + /* + * Exceptions must be injected immediately, or the exception + * frame will have the address of the NMI or interrupt handler. + */ + if (!vcpu->arch.exception.pending) { + if (vcpu->arch.nmi_injected) { + kvm_x86_ops->set_nmi(vcpu); + return 0; + } + + if (vcpu->arch.interrupt.pending) { + kvm_x86_ops->set_irq(vcpu); + return 0; + } + } + + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } + + /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = true; + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); @@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - return 0; - } - - if (vcpu->arch.nmi_injected) { - kvm_x86_ops->set_nmi(vcpu); - return 0; - } - - if (vcpu->arch.interrupt.pending) { - kvm_x86_ops->set_irq(vcpu); - return 0; - } - - if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); - if (r != 0) - return r; - } - - /* try to inject new event if pending */ - if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { vcpu->arch.smi_pending = false; enter_smm(vcpu); } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { @@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); vcpu->arch.hflags |= HF_SMM_MASK; memset(buf, 0, 512); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else enter_smm_save_state_32(vcpu, buf); @@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) kvm_x86_ops->set_efer(vcpu, 0); kvm_update_cpuid(vcpu); @@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; r = 0; goto out; } @@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); + WARN_ON(vcpu->arch.exception.pending); } if (kvm_lapic_enabled(vcpu)) { @@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); + vcpu->arch.gpa_available = false; r = kvm_x86_ops->handle_exit(vcpu); return r; @@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int pending_vec, max_bits, idx; struct desc_ptr dt; - if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + (sregs->cr4 & X86_CR4_OSXSAVE)) + return -EINVAL; + + apic_base_msr.data = sregs->apic_base; + apic_base_msr.host_initiated = true; + if (kvm_set_apic_base(vcpu, &apic_base_msr)) return -EINVAL; dt.size = sregs->idt.limit; @@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->arch.efer != sregs->efer; kvm_x86_ops->set_efer(vcpu, sregs->efer); - apic_base_msr.data = sregs->apic_base; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; kvm_x86_ops->set_cr0(vcpu, sregs->cr0); @@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.nmi_injected = false; kvm_clear_interrupt_queue(vcpu); kvm_clear_exception_queue(vcpu); + vcpu->arch.exception.pending = false; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); @@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; kvm_hv_vcpu_init(vcpu); @@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.preempted_in_kernel; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 612067074905..51e349cf5f45 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -11,7 +11,7 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { - vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = false; } static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) { - return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || + return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || vcpu->arch.nmi_injected; } @@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) return cs_l; } +static inline bool is_la57_mode(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_X86_64 + return (vcpu->arch.efer & EFER_LMA) && + kvm_read_cr4_bits(vcpu, X86_CR4_LA57); +#else + return 0; +#endif +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; @@ -87,10 +97,48 @@ static inline u32 bit(int bitno) return 1 << (bitno & 31); } +static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) +{ + return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; +} + +static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) +{ + return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48; +} + +static inline u64 get_canonical(u64 la, u8 vaddr_bits) +{ + return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; +#else + return false; +#endif +} + +static inline bool emul_is_noncanonical_address(u64 la, + struct x86_emulate_ctxt *ctxt) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; +#else + return false; +#endif +} + static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, unsigned access) { - vcpu->arch.mmio_gva = gva & PAGE_MASK; + /* + * If this is a shadow nested page table, the "GVA" is + * actually a nGPA. + */ + vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 11e407489db0..f2228b150faa 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -618,3 +618,20 @@ static void quirk_apple_mbp_poweroff(struct pci_dev *pdev) dev_info(dev, "can't work around MacBook Pro poweroff issue\n"); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff); + +/* + * VMD-enabled root ports will change the source ID for all messages + * to the VMD device. Rather than doing device matching with the source + * ID, the AER driver should traverse the child device tree, reading + * AER registers to find the faulting device. + */ +static void quirk_no_aersid(struct pci_dev *pdev) +{ + /* VMD Domain */ + if (is_vmd(pdev->bus)) + pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3e8636f1220e..bb05fc50ee2e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -901,6 +901,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf struct pci_dev *pf_pdev; pdev = to_pci_dev(dev); + +#ifdef CONFIG_X86 + /* VMD child devices currently cannot be handled individually */ + if (is_vmd(pdev->bus)) + return NULL; +#endif + /* VFs aren't listed in scope tables; we need to look up * the PF instead to find the IOMMU. */ pf_pdev = pci_physfn(pdev); diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 09c10f426b64..deb203026496 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -72,6 +72,11 @@ static DEFINE_IDA(pci_endpoint_test_ida); #define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \ miscdev) + +static bool no_msi; +module_param(no_msi, bool, 0444); +MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test"); + enum pci_barno { BAR_0, BAR_1, @@ -90,9 +95,15 @@ struct pci_endpoint_test { /* mutex to protect the ioctls */ struct mutex mutex; struct miscdevice miscdev; + enum pci_barno test_reg_bar; + size_t alignment; }; -static int bar_size[] = { 4, 512, 1024, 16384, 131072, 1048576 }; +struct pci_endpoint_test_data { + enum pci_barno test_reg_bar; + size_t alignment; + bool no_msi; +}; static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, u32 offset) @@ -141,11 +152,15 @@ static bool pci_endpoint_test_bar(struct pci_endpoint_test *test, int j; u32 val; int size; + struct pci_dev *pdev = test->pdev; if (!test->bar[barno]) return false; - size = bar_size[barno]; + size = pci_resource_len(pdev, barno); + + if (barno == test->test_reg_bar) + size = 0x4; for (j = 0; j < size; j += 4) pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0); @@ -202,16 +217,32 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) dma_addr_t dst_phys_addr; struct pci_dev *pdev = test->pdev; struct device *dev = &pdev->dev; + void *orig_src_addr; + dma_addr_t orig_src_phys_addr; + void *orig_dst_addr; + dma_addr_t orig_dst_phys_addr; + size_t offset; + size_t alignment = test->alignment; u32 src_crc32; u32 dst_crc32; - src_addr = dma_alloc_coherent(dev, size, &src_phys_addr, GFP_KERNEL); - if (!src_addr) { + orig_src_addr = dma_alloc_coherent(dev, size + alignment, + &orig_src_phys_addr, GFP_KERNEL); + if (!orig_src_addr) { dev_err(dev, "failed to allocate source buffer\n"); ret = false; goto err; } + if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) { + src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment); + offset = src_phys_addr - orig_src_phys_addr; + src_addr = orig_src_addr + offset; + } else { + src_phys_addr = orig_src_phys_addr; + src_addr = orig_src_addr; + } + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR, lower_32_bits(src_phys_addr)); @@ -221,11 +252,21 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) get_random_bytes(src_addr, size); src_crc32 = crc32_le(~0, src_addr, size); - dst_addr = dma_alloc_coherent(dev, size, &dst_phys_addr, GFP_KERNEL); - if (!dst_addr) { + orig_dst_addr = dma_alloc_coherent(dev, size + alignment, + &orig_dst_phys_addr, GFP_KERNEL); + if (!orig_dst_addr) { dev_err(dev, "failed to allocate destination address\n"); ret = false; - goto err_src_addr; + goto err_orig_src_addr; + } + + if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) { + dst_phys_addr = PTR_ALIGN(orig_dst_phys_addr, alignment); + offset = dst_phys_addr - orig_dst_phys_addr; + dst_addr = orig_dst_addr + offset; + } else { + dst_phys_addr = orig_dst_phys_addr; + dst_addr = orig_dst_addr; } pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, @@ -245,10 +286,12 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) if (dst_crc32 == src_crc32) ret = true; - dma_free_coherent(dev, size, dst_addr, dst_phys_addr); + dma_free_coherent(dev, size + alignment, orig_dst_addr, + orig_dst_phys_addr); -err_src_addr: - dma_free_coherent(dev, size, src_addr, src_phys_addr); +err_orig_src_addr: + dma_free_coherent(dev, size + alignment, orig_src_addr, + orig_src_phys_addr); err: return ret; @@ -262,15 +305,29 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) dma_addr_t phys_addr; struct pci_dev *pdev = test->pdev; struct device *dev = &pdev->dev; + void *orig_addr; + dma_addr_t orig_phys_addr; + size_t offset; + size_t alignment = test->alignment; u32 crc32; - addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); - if (!addr) { + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, + GFP_KERNEL); + if (!orig_addr) { dev_err(dev, "failed to allocate address\n"); ret = false; goto err; } + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { + phys_addr = PTR_ALIGN(orig_phys_addr, alignment); + offset = phys_addr - orig_phys_addr; + addr = orig_addr + offset; + } else { + phys_addr = orig_phys_addr; + addr = orig_addr; + } + get_random_bytes(addr, size); crc32 = crc32_le(~0, addr, size); @@ -293,7 +350,7 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) if (reg & STATUS_READ_SUCCESS) ret = true; - dma_free_coherent(dev, size, addr, phys_addr); + dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); err: return ret; @@ -306,15 +363,29 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) dma_addr_t phys_addr; struct pci_dev *pdev = test->pdev; struct device *dev = &pdev->dev; + void *orig_addr; + dma_addr_t orig_phys_addr; + size_t offset; + size_t alignment = test->alignment; u32 crc32; - addr = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); - if (!addr) { + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, + GFP_KERNEL); + if (!orig_addr) { dev_err(dev, "failed to allocate destination address\n"); ret = false; goto err; } + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { + phys_addr = PTR_ALIGN(orig_phys_addr, alignment); + offset = phys_addr - orig_phys_addr; + addr = orig_addr + offset; + } else { + phys_addr = orig_phys_addr; + addr = orig_addr; + } + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, lower_32_bits(phys_addr)); pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR, @@ -331,7 +402,7 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM)) ret = true; - dma_free_coherent(dev, size, addr, phys_addr); + dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); err: return ret; } @@ -383,13 +454,15 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, { int i; int err; - int irq; + int irq = 0; int id; char name[20]; enum pci_barno bar; void __iomem *base; struct device *dev = &pdev->dev; struct pci_endpoint_test *test; + struct pci_endpoint_test_data *data; + enum pci_barno test_reg_bar = BAR_0; struct miscdevice *misc_device; if (pci_is_bridge(pdev)) @@ -399,7 +472,17 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, if (!test) return -ENOMEM; + test->test_reg_bar = 0; + test->alignment = 0; test->pdev = pdev; + + data = (struct pci_endpoint_test_data *)ent->driver_data; + if (data) { + test_reg_bar = data->test_reg_bar; + test->alignment = data->alignment; + no_msi = data->no_msi; + } + init_completion(&test->irq_raised); mutex_init(&test->mutex); @@ -417,9 +500,11 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); - if (irq < 0) - dev_err(dev, "failed to get MSI interrupts\n"); + if (!no_msi) { + irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); + if (irq < 0) + dev_err(dev, "failed to get MSI interrupts\n"); + } err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler, IRQF_SHARED, DRV_MODULE_NAME, test); @@ -441,14 +526,15 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, base = pci_ioremap_bar(pdev, bar); if (!base) { dev_err(dev, "failed to read BAR%d\n", bar); - WARN_ON(bar == BAR_0); + WARN_ON(bar == test_reg_bar); } test->bar[bar] = base; } - test->base = test->bar[0]; + test->base = test->bar[test_reg_bar]; if (!test->base) { - dev_err(dev, "Cannot perform PCI test without BAR0\n"); + dev_err(dev, "Cannot perform PCI test without BAR%d\n", + test_reg_bar); goto err_iounmap; } diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig index d275aadc47ee..22ec82fcdea2 100644 --- a/drivers/pci/dwc/Kconfig +++ b/drivers/pci/dwc/Kconfig @@ -25,7 +25,7 @@ config PCI_DRA7XX work either as EP or RC. In order to enable host-specific features PCI_DRA7XX_HOST must be selected and in order to enable device- specific features PCI_DRA7XX_EP must be selected. This uses - the Designware core. + the DesignWare core. if PCI_DRA7XX @@ -97,8 +97,8 @@ config PCI_KEYSTONE select PCIE_DW_HOST help Say Y here if you want to enable PCI controller support on Keystone - SoCs. The PCI controller on Keystone is based on Designware hardware - and therefore the driver re-uses the Designware core functions to + SoCs. The PCI controller on Keystone is based on DesignWare hardware + and therefore the driver re-uses the DesignWare core functions to implement the driver. config PCI_LAYERSCAPE @@ -132,7 +132,7 @@ config PCIE_QCOM select PCIE_DW_HOST help Say Y here to enable PCIe controller support on Qualcomm SoCs. The - PCIe controller uses the Designware core plus Qualcomm-specific + PCIe controller uses the DesignWare core plus Qualcomm-specific hardware wrappers. config PCIE_ARMADA_8K @@ -145,8 +145,8 @@ config PCIE_ARMADA_8K help Say Y here if you want to enable PCIe controller support on Armada-8K SoCs. The PCIe controller on Armada-8K is based on - Designware hardware and therefore the driver re-uses the - Designware core functions to implement the driver. + DesignWare hardware and therefore the driver re-uses the + DesignWare core functions to implement the driver. config PCIE_ARTPEC6 bool "Axis ARTPEC-6 PCIe controller" diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c index f2fc5f47064e..34427a6a15af 100644 --- a/drivers/pci/dwc/pci-dra7xx.c +++ b/drivers/pci/dwc/pci-dra7xx.c @@ -195,7 +195,7 @@ static void dra7xx_pcie_enable_interrupts(struct dra7xx_pcie *dra7xx) dra7xx_pcie_enable_msi_interrupts(dra7xx); } -static void dra7xx_pcie_host_init(struct pcie_port *pp) +static int dra7xx_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci); @@ -206,6 +206,8 @@ static void dra7xx_pcie_host_init(struct pcie_port *pp) dw_pcie_wait_for_link(pci); dw_pcie_msi_init(pp); dra7xx_pcie_enable_interrupts(dra7xx); + + return 0; } static const struct dw_pcie_host_ops dra7xx_pcie_host_ops = { @@ -238,7 +240,7 @@ static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp) return -ENODEV; } - dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, 4, + dra7xx->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, &intx_domain_ops, pp); if (!dra7xx->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); @@ -275,7 +277,6 @@ static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg) return IRQ_HANDLED; } - static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg) { struct dra7xx_pcie *dra7xx = arg; @@ -335,10 +336,23 @@ static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg) return IRQ_HANDLED; } +static void dw_pcie_ep_reset_bar(struct dw_pcie *pci, enum pci_barno bar) +{ + u32 reg; + + reg = PCI_BASE_ADDRESS_0 + (4 * bar); + dw_pcie_writel_dbi2(pci, reg, 0x0); + dw_pcie_writel_dbi(pci, reg, 0x0); +} + static void dra7xx_pcie_ep_init(struct dw_pcie_ep *ep) { struct dw_pcie *pci = to_dw_pcie_from_ep(ep); struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci); + enum pci_barno bar; + + for (bar = BAR_0; bar <= BAR_5; bar++) + dw_pcie_ep_reset_bar(pci, bar); dra7xx_pcie_enable_wrapper_interrupts(dra7xx); } @@ -435,7 +449,7 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, pp->irq = platform_get_irq(pdev, 1); if (pp->irq < 0) { dev_err(dev, "missing IRQ resource\n"); - return -EINVAL; + return pp->irq; } ret = devm_request_irq(dev, pp->irq, dra7xx_pcie_msi_irq_handler, @@ -616,8 +630,8 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "missing IRQ resource\n"); - return -EINVAL; + dev_err(dev, "missing IRQ resource: %d\n", irq); + return irq; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ti_conf"); diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c index c78c06552590..5596fdedbb94 100644 --- a/drivers/pci/dwc/pci-exynos.c +++ b/drivers/pci/dwc/pci-exynos.c @@ -581,13 +581,15 @@ static int exynos_pcie_link_up(struct dw_pcie *pci) return 0; } -static void exynos_pcie_host_init(struct pcie_port *pp) +static int exynos_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct exynos_pcie *ep = to_exynos_pcie(pci); exynos_pcie_establish_link(ep); exynos_pcie_enable_interrupts(ep); + + return 0; } static const struct dw_pcie_host_ops exynos_pcie_host_ops = { @@ -605,9 +607,9 @@ static int __init exynos_add_pcie_port(struct exynos_pcie *ep, int ret; pp->irq = platform_get_irq(pdev, 1); - if (!pp->irq) { + if (pp->irq < 0) { dev_err(dev, "failed to get irq\n"); - return -ENODEV; + return pp->irq; } ret = devm_request_irq(dev, pp->irq, exynos_pcie_irq_handler, IRQF_SHARED, "exynos-pcie", ep); @@ -618,9 +620,9 @@ static int __init exynos_add_pcie_port(struct exynos_pcie *ep, if (IS_ENABLED(CONFIG_PCI_MSI)) { pp->msi_irq = platform_get_irq(pdev, 0); - if (!pp->msi_irq) { + if (pp->msi_irq < 0) { dev_err(dev, "failed to get msi irq\n"); - return -ENODEV; + return pp->msi_irq; } ret = devm_request_irq(dev, pp->msi_irq, diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c index bf5c3616e344..b73483534a5b 100644 --- a/drivers/pci/dwc/pci-imx6.c +++ b/drivers/pci/dwc/pci-imx6.c @@ -636,7 +636,7 @@ err_reset_phy: return ret; } -static void imx6_pcie_host_init(struct pcie_port *pp) +static int imx6_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct imx6_pcie *imx6_pcie = to_imx6_pcie(pci); @@ -649,6 +649,8 @@ static void imx6_pcie_host_init(struct pcie_port *pp) if (IS_ENABLED(CONFIG_PCI_MSI)) dw_pcie_msi_init(pp); + + return 0; } static int imx6_pcie_link_up(struct dw_pcie *pci) @@ -778,14 +780,15 @@ static int imx6_pcie_probe(struct platform_device *pdev) } break; case IMX7D: - imx6_pcie->pciephy_reset = devm_reset_control_get(dev, - "pciephy"); + imx6_pcie->pciephy_reset = devm_reset_control_get_exclusive(dev, + "pciephy"); if (IS_ERR(imx6_pcie->pciephy_reset)) { dev_err(dev, "Failed to get PCIEPHY reset control\n"); return PTR_ERR(imx6_pcie->pciephy_reset); } - imx6_pcie->apps_reset = devm_reset_control_get(dev, "apps"); + imx6_pcie->apps_reset = devm_reset_control_get_exclusive(dev, + "apps"); if (IS_ERR(imx6_pcie->apps_reset)) { dev_err(dev, "Failed to get PCIE APPS reset control\n"); return PTR_ERR(imx6_pcie->apps_reset); diff --git a/drivers/pci/dwc/pci-keystone-dw.c b/drivers/pci/dwc/pci-keystone-dw.c index 8bc626e640c8..2fb20b887d2a 100644 --- a/drivers/pci/dwc/pci-keystone-dw.c +++ b/drivers/pci/dwc/pci-keystone-dw.c @@ -1,5 +1,5 @@ /* - * Designware application register space functions for Keystone PCI controller + * DesignWare application register space functions for Keystone PCI controller * * Copyright (C) 2013-2014 Texas Instruments., Ltd. * http://www.ti.com @@ -168,16 +168,12 @@ void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq) static void ks_dw_pcie_msi_irq_mask(struct irq_data *d) { - struct keystone_pcie *ks_pcie; struct msi_desc *msi; struct pcie_port *pp; - struct dw_pcie *pci; u32 offset; msi = irq_data_get_msi_desc(d); pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); - pci = to_dw_pcie_from_pp(pp); - ks_pcie = to_keystone_pcie(pci); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); /* Mask the end point if PVM implemented */ @@ -191,16 +187,12 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d) static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d) { - struct keystone_pcie *ks_pcie; struct msi_desc *msi; struct pcie_port *pp; - struct dw_pcie *pci; u32 offset; msi = irq_data_get_msi_desc(d); pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); - pci = to_dw_pcie_from_pp(pp); - ks_pcie = to_keystone_pcie(pci); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); /* Mask the end point if PVM implemented */ @@ -259,7 +251,7 @@ void ks_dw_pcie_enable_legacy_irqs(struct keystone_pcie *ks_pcie) { int i; - for (i = 0; i < MAX_LEGACY_IRQS; i++) + for (i = 0; i < PCI_NUM_INTX; i++) ks_dw_app_writel(ks_pcie, IRQ_ENABLE_SET + (i << 4), 0x1); } @@ -565,7 +557,7 @@ int __init ks_dw_pcie_host_init(struct keystone_pcie *ks_pcie, /* Create legacy IRQ domain */ ks_pcie->legacy_irq_domain = irq_domain_add_linear(ks_pcie->legacy_intc_np, - MAX_LEGACY_IRQS, + PCI_NUM_INTX, &ks_dw_pcie_legacy_irq_domain_ops, NULL); if (!ks_pcie->legacy_irq_domain) { diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c index 4783cec1f78d..5bee3af47588 100644 --- a/drivers/pci/dwc/pci-keystone.c +++ b/drivers/pci/dwc/pci-keystone.c @@ -32,10 +32,6 @@ #define DRIVER_NAME "keystone-pcie" -/* driver specific constants */ -#define MAX_MSI_HOST_IRQS 8 -#define MAX_LEGACY_HOST_IRQS 4 - /* DEV_STAT_CTRL */ #define PCIE_CAP_BASE 0x70 @@ -173,7 +169,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, if (legacy) { np_temp = &ks_pcie->legacy_intc_np; - max_host_irqs = MAX_LEGACY_HOST_IRQS; + max_host_irqs = PCI_NUM_INTX; host_irqs = &ks_pcie->legacy_host_irqs[0]; } else { np_temp = &ks_pcie->msi_intc_np; @@ -261,7 +257,7 @@ static int keystone_pcie_fault(unsigned long addr, unsigned int fsr, return 0; } -static void __init ks_pcie_host_init(struct pcie_port *pp) +static int __init ks_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct keystone_pcie *ks_pcie = to_keystone_pcie(pci); @@ -289,6 +285,8 @@ static void __init ks_pcie_host_init(struct pcie_port *pp) */ hook_fault_code(17, keystone_pcie_fault, SIGBUS, 0, "Asynchronous external abort"); + + return 0; } static const struct dw_pcie_host_ops keystone_pcie_host_ops = { diff --git a/drivers/pci/dwc/pci-keystone.h b/drivers/pci/dwc/pci-keystone.h index 74c5825882df..30b7bc2ac380 100644 --- a/drivers/pci/dwc/pci-keystone.h +++ b/drivers/pci/dwc/pci-keystone.h @@ -12,9 +12,7 @@ * published by the Free Software Foundation. */ -#define MAX_LEGACY_IRQS 4 #define MAX_MSI_HOST_IRQS 8 -#define MAX_LEGACY_HOST_IRQS 4 struct keystone_pcie { struct dw_pcie *pci; @@ -22,7 +20,7 @@ struct keystone_pcie { /* PCI Device ID */ u32 device_id; int num_legacy_host_irqs; - int legacy_host_irqs[MAX_LEGACY_HOST_IRQS]; + int legacy_host_irqs[PCI_NUM_INTX]; struct device_node *legacy_intc_np; int num_msi_host_irqs; diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c index fd861289ad8b..87fa486bee2c 100644 --- a/drivers/pci/dwc/pci-layerscape.c +++ b/drivers/pci/dwc/pci-layerscape.c @@ -33,7 +33,8 @@ /* PEX Internal Configuration Registers */ #define PCIE_STRFMR1 0x71c /* Symbol Timer & Filter Mask Register1 */ -#define PCIE_DBI_RO_WR_EN 0x8bc /* DBI Read-Only Write Enable Register */ + +#define PCIE_IATU_NUM 6 struct ls_pcie_drvdata { u32 lut_offset; @@ -72,14 +73,6 @@ static void ls_pcie_clear_multifunction(struct ls_pcie *pcie) iowrite8(PCI_HEADER_TYPE_BRIDGE, pci->dbi_base + PCI_HEADER_TYPE); } -/* Fix class value */ -static void ls_pcie_fix_class(struct ls_pcie *pcie) -{ - struct dw_pcie *pci = pcie->pci; - - iowrite16(PCI_CLASS_BRIDGE_PCI, pci->dbi_base + PCI_CLASS_DEVICE); -} - /* Drop MSG TLP except for Vendor MSG */ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie) { @@ -91,6 +84,14 @@ static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie) iowrite32(val, pci->dbi_base + PCIE_STRFMR1); } +static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie) +{ + int i; + + for (i = 0; i < PCIE_IATU_NUM; i++) + dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i); +} + static int ls1021_pcie_link_up(struct dw_pcie *pci) { u32 state; @@ -108,33 +109,6 @@ static int ls1021_pcie_link_up(struct dw_pcie *pci) return 1; } -static void ls1021_pcie_host_init(struct pcie_port *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct ls_pcie *pcie = to_ls_pcie(pci); - struct device *dev = pci->dev; - u32 index[2]; - - pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node, - "fsl,pcie-scfg"); - if (IS_ERR(pcie->scfg)) { - dev_err(dev, "No syscfg phandle specified\n"); - pcie->scfg = NULL; - return; - } - - if (of_property_read_u32_array(dev->of_node, - "fsl,pcie-scfg", index, 2)) { - pcie->scfg = NULL; - return; - } - pcie->index = index[1]; - - dw_pcie_setup_rc(pp); - - ls_pcie_drop_msg_tlp(pcie); -} - static int ls_pcie_link_up(struct dw_pcie *pci) { struct ls_pcie *pcie = to_ls_pcie(pci); @@ -150,16 +124,54 @@ static int ls_pcie_link_up(struct dw_pcie *pci) return 1; } -static void ls_pcie_host_init(struct pcie_port *pp) +static int ls_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct ls_pcie *pcie = to_ls_pcie(pci); - iowrite32(1, pci->dbi_base + PCIE_DBI_RO_WR_EN); - ls_pcie_fix_class(pcie); + /* + * Disable outbound windows configured by the bootloader to avoid + * one transaction hitting multiple outbound windows. + * dw_pcie_setup_rc() will reconfigure the outbound windows. + */ + ls_pcie_disable_outbound_atus(pcie); + + dw_pcie_dbi_ro_wr_en(pci); ls_pcie_clear_multifunction(pcie); + dw_pcie_dbi_ro_wr_dis(pci); + ls_pcie_drop_msg_tlp(pcie); - iowrite32(0, pci->dbi_base + PCIE_DBI_RO_WR_EN); + + dw_pcie_setup_rc(pp); + + return 0; +} + +static int ls1021_pcie_host_init(struct pcie_port *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct ls_pcie *pcie = to_ls_pcie(pci); + struct device *dev = pci->dev; + u32 index[2]; + int ret; + + pcie->scfg = syscon_regmap_lookup_by_phandle(dev->of_node, + "fsl,pcie-scfg"); + if (IS_ERR(pcie->scfg)) { + ret = PTR_ERR(pcie->scfg); + dev_err(dev, "No syscfg phandle specified\n"); + pcie->scfg = NULL; + return ret; + } + + if (of_property_read_u32_array(dev->of_node, + "fsl,pcie-scfg", index, 2)) { + pcie->scfg = NULL; + return -EINVAL; + } + pcie->index = index[1]; + + return ls_pcie_host_init(pp); } static int ls_pcie_msi_host_init(struct pcie_port *pp, @@ -232,12 +244,22 @@ static struct ls_pcie_drvdata ls2080_drvdata = { .dw_pcie_ops = &dw_ls_pcie_ops, }; +static struct ls_pcie_drvdata ls2088_drvdata = { + .lut_offset = 0x80000, + .ltssm_shift = 0, + .lut_dbg = 0x407fc, + .ops = &ls_pcie_host_ops, + .dw_pcie_ops = &dw_ls_pcie_ops, +}; + static const struct of_device_id ls_pcie_of_match[] = { { .compatible = "fsl,ls1021a-pcie", .data = &ls1021_drvdata }, { .compatible = "fsl,ls1043a-pcie", .data = &ls1043_drvdata }, { .compatible = "fsl,ls1046a-pcie", .data = &ls1046_drvdata }, { .compatible = "fsl,ls2080a-pcie", .data = &ls2080_drvdata }, { .compatible = "fsl,ls2085a-pcie", .data = &ls2080_drvdata }, + { .compatible = "fsl,ls2088a-pcie", .data = &ls2088_drvdata }, + { .compatible = "fsl,ls1088a-pcie", .data = &ls2088_drvdata }, { }, }; diff --git a/drivers/pci/dwc/pcie-armada8k.c b/drivers/pci/dwc/pcie-armada8k.c index ea8f34af6a85..370d057c0046 100644 --- a/drivers/pci/dwc/pcie-armada8k.c +++ b/drivers/pci/dwc/pcie-armada8k.c @@ -134,13 +134,15 @@ static void armada8k_pcie_establish_link(struct armada8k_pcie *pcie) dev_err(pci->dev, "Link not up after reconfiguration\n"); } -static void armada8k_pcie_host_init(struct pcie_port *pp) +static int armada8k_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct armada8k_pcie *pcie = to_armada8k_pcie(pci); dw_pcie_setup_rc(pp); armada8k_pcie_establish_link(pcie); + + return 0; } static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) @@ -176,9 +178,9 @@ static int armada8k_add_pcie_port(struct armada8k_pcie *pcie, pp->ops = &armada8k_pcie_host_ops; pp->irq = platform_get_irq(pdev, 0); - if (!pp->irq) { + if (pp->irq < 0) { dev_err(dev, "failed to get irq for port\n"); - return -ENODEV; + return pp->irq; } ret = devm_request_irq(dev, pp->irq, armada8k_pcie_irq_handler, @@ -226,7 +228,9 @@ static int armada8k_pcie_probe(struct platform_device *pdev) if (IS_ERR(pcie->clk)) return PTR_ERR(pcie->clk); - clk_prepare_enable(pcie->clk); + ret = clk_prepare_enable(pcie->clk); + if (ret) + return ret; /* Get the dw-pcie unit configuration/control registers base. */ base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl"); diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c index 01c6f7823672..6653619db6a1 100644 --- a/drivers/pci/dwc/pcie-artpec6.c +++ b/drivers/pci/dwc/pcie-artpec6.c @@ -141,12 +141,6 @@ static int artpec6_pcie_establish_link(struct artpec6_pcie *artpec6_pcie) artpec6_pcie_writel(artpec6_pcie, PCIECFG, val); usleep_range(100, 200); - /* - * Enable writing to config regs. This is required as the Synopsys - * driver changes the class code. That register needs DBI write enable. - */ - dw_pcie_writel_dbi(pci, MISC_CONTROL_1_OFF, DBI_RO_WR_EN); - /* setup root complex */ dw_pcie_setup_rc(pp); @@ -175,13 +169,15 @@ static void artpec6_pcie_enable_interrupts(struct artpec6_pcie *artpec6_pcie) dw_pcie_msi_init(pp); } -static void artpec6_pcie_host_init(struct pcie_port *pp) +static int artpec6_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct artpec6_pcie *artpec6_pcie = to_artpec6_pcie(pci); artpec6_pcie_establish_link(artpec6_pcie); artpec6_pcie_enable_interrupts(artpec6_pcie); + + return 0; } static const struct dw_pcie_host_ops artpec6_pcie_host_ops = { @@ -207,9 +203,9 @@ static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie, if (IS_ENABLED(CONFIG_PCI_MSI)) { pp->msi_irq = platform_get_irq_byname(pdev, "msi"); - if (pp->msi_irq <= 0) { + if (pp->msi_irq < 0) { dev_err(dev, "failed to get MSI irq\n"); - return -ENODEV; + return pp->msi_irq; } ret = devm_request_irq(dev, pp->msi_irq, diff --git a/drivers/pci/dwc/pcie-designware-ep.c b/drivers/pci/dwc/pcie-designware-ep.c index 398406393f37..d53d5f168363 100644 --- a/drivers/pci/dwc/pcie-designware-ep.c +++ b/drivers/pci/dwc/pcie-designware-ep.c @@ -1,5 +1,5 @@ /** - * Synopsys Designware PCIe Endpoint controller driver + * Synopsys DesignWare PCIe Endpoint controller driver * * Copyright (C) 2017 Texas Instruments * Author: Kishon Vijay Abraham I <kishon@ti.com> @@ -283,7 +283,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) { int ret; void *addr; - enum pci_barno bar; struct pci_epc *epc; struct dw_pcie *pci = to_dw_pcie_from_ep(ep); struct device *dev = pci->dev; @@ -312,9 +311,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) return -ENOMEM; ep->outbound_addr = addr; - for (bar = BAR_0; bar <= BAR_5; bar++) - dw_pcie_ep_reset_bar(pci, bar); - if (ep->ops->ep_init) ep->ops->ep_init(ep); @@ -328,7 +324,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) if (ret < 0) epc->max_functions = 1; - ret = pci_epc_mem_init(epc, ep->phys_base, ep->addr_size); + ret = __pci_epc_mem_init(epc, ep->phys_base, ep->addr_size, + ep->page_size); if (ret < 0) { dev_err(dev, "Failed to initialize address space\n"); return ret; diff --git a/drivers/pci/dwc/pcie-designware-host.c b/drivers/pci/dwc/pcie-designware-host.c index d29c020da082..81e2157a7cfb 100644 --- a/drivers/pci/dwc/pcie-designware-host.c +++ b/drivers/pci/dwc/pcie-designware-host.c @@ -1,5 +1,5 @@ /* - * Synopsys Designware PCIe host controller driver + * Synopsys DesignWare PCIe host controller driver * * Copyright (C) 2013 Samsung Electronics Co., Ltd. * http://www.samsung.com @@ -71,9 +71,9 @@ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp) while ((pos = find_next_bit((unsigned long *) &val, 32, pos)) != 32) { irq = irq_find_mapping(pp->irq_domain, i * 32 + pos); + generic_handle_irq(irq); dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_STATUS + i * 12, 4, 1 << pos); - generic_handle_irq(irq); pos++; } } @@ -401,8 +401,11 @@ int dw_pcie_host_init(struct pcie_port *pp) } } - if (pp->ops->host_init) - pp->ops->host_init(pp); + if (pp->ops->host_init) { + ret = pp->ops->host_init(pp); + if (ret) + goto error; + } pp->root_bus_nr = pp->busn->start; @@ -594,10 +597,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp) dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000); /* setup interrupt pins */ + dw_pcie_dbi_ro_wr_en(pci); val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE); val &= 0xffff00ff; val |= 0x00000100; dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val); + dw_pcie_dbi_ro_wr_dis(pci); /* setup bus numbers */ val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS); @@ -634,8 +639,12 @@ void dw_pcie_setup_rc(struct pcie_port *pp) dw_pcie_wr_own_conf(pp, PCI_BASE_ADDRESS_0, 4, 0); + /* Enable write permission for the DBI read-only register */ + dw_pcie_dbi_ro_wr_en(pci); /* program correct class for RC */ dw_pcie_wr_own_conf(pp, PCI_CLASS_DEVICE, 2, PCI_CLASS_BRIDGE_PCI); + /* Better disable write permission right after the update */ + dw_pcie_dbi_ro_wr_dis(pci); dw_pcie_rd_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, &val); val |= PORT_LOGIC_SPEED_CHANGE; diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c index 091b4e7ad059..168e2380f493 100644 --- a/drivers/pci/dwc/pcie-designware-plat.c +++ b/drivers/pci/dwc/pcie-designware-plat.c @@ -35,7 +35,7 @@ static irqreturn_t dw_plat_pcie_msi_irq_handler(int irq, void *arg) return dw_handle_msi_irq(pp); } -static void dw_plat_pcie_host_init(struct pcie_port *pp) +static int dw_plat_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -44,6 +44,8 @@ static void dw_plat_pcie_host_init(struct pcie_port *pp) if (IS_ENABLED(CONFIG_PCI_MSI)) dw_pcie_msi_init(pp); + + return 0; } static const struct dw_pcie_host_ops dw_plat_pcie_host_ops = { diff --git a/drivers/pci/dwc/pcie-designware.c b/drivers/pci/dwc/pcie-designware.c index 0e03af279259..88abdddee2ad 100644 --- a/drivers/pci/dwc/pcie-designware.c +++ b/drivers/pci/dwc/pcie-designware.c @@ -1,5 +1,5 @@ /* - * Synopsys Designware PCIe host controller driver + * Synopsys DesignWare PCIe host controller driver * * Copyright (C) 2013 Samsung Electronics Co., Ltd. * http://www.samsung.com @@ -107,8 +107,9 @@ static void dw_pcie_writel_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg, dw_pcie_writel_dbi(pci, offset + reg, val); } -void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index, int type, - u64 cpu_addr, u64 pci_addr, u32 size) +static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index, + int type, u64 cpu_addr, + u64 pci_addr, u32 size) { u32 retries, val; @@ -177,7 +178,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type, */ for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) { val = dw_pcie_readl_dbi(pci, PCIE_ATU_CR2); - if (val == PCIE_ATU_ENABLE) + if (val & PCIE_ATU_ENABLE) return; usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX); @@ -200,8 +201,9 @@ static void dw_pcie_writel_ib_unroll(struct dw_pcie *pci, u32 index, u32 reg, dw_pcie_writel_dbi(pci, offset + reg, val); } -int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index, int bar, - u64 cpu_addr, enum dw_pcie_as_type as_type) +static int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index, + int bar, u64 cpu_addr, + enum dw_pcie_as_type as_type) { int type; u32 retries, val; diff --git a/drivers/pci/dwc/pcie-designware.h b/drivers/pci/dwc/pcie-designware.h index b4d2a89f8e58..e5d9d77b778e 100644 --- a/drivers/pci/dwc/pcie-designware.h +++ b/drivers/pci/dwc/pcie-designware.h @@ -1,5 +1,5 @@ /* - * Synopsys Designware PCIe host controller driver + * Synopsys DesignWare PCIe host controller driver * * Copyright (C) 2013 Samsung Electronics Co., Ltd. * http://www.samsung.com @@ -76,6 +76,9 @@ #define PCIE_ATU_FUNC(x) (((x) & 0x7) << 16) #define PCIE_ATU_UPPER_TARGET 0x91C +#define PCIE_MISC_CONTROL_1_OFF 0x8BC +#define PCIE_DBI_RO_WR_EN (0x1 << 0) + /* * iATU Unroll-specific register definitions * From 4.80 core version the address translation will be made by unroll @@ -134,7 +137,7 @@ struct dw_pcie_host_ops { unsigned int devfn, int where, int size, u32 *val); int (*wr_other_conf)(struct pcie_port *pp, struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val); - void (*host_init)(struct pcie_port *pp); + int (*host_init)(struct pcie_port *pp); void (*msi_set_irq)(struct pcie_port *pp, int irq); void (*msi_clear_irq)(struct pcie_port *pp, int irq); phys_addr_t (*get_msi_addr)(struct pcie_port *pp); @@ -186,6 +189,7 @@ struct dw_pcie_ep { struct dw_pcie_ep_ops *ops; phys_addr_t phys_base; size_t addr_size; + size_t page_size; u8 bar_to_atu[6]; phys_addr_t *outbound_addr; unsigned long ib_window_map; @@ -279,6 +283,28 @@ static inline u32 dw_pcie_readl_dbi2(struct dw_pcie *pci, u32 reg) return __dw_pcie_read_dbi(pci, pci->dbi_base2, reg, 0x4); } +static inline void dw_pcie_dbi_ro_wr_en(struct dw_pcie *pci) +{ + u32 reg; + u32 val; + + reg = PCIE_MISC_CONTROL_1_OFF; + val = dw_pcie_readl_dbi(pci, reg); + val |= PCIE_DBI_RO_WR_EN; + dw_pcie_writel_dbi(pci, reg, val); +} + +static inline void dw_pcie_dbi_ro_wr_dis(struct dw_pcie *pci) +{ + u32 reg; + u32 val; + + reg = PCIE_MISC_CONTROL_1_OFF; + val = dw_pcie_readl_dbi(pci, reg); + val &= ~PCIE_DBI_RO_WR_EN; + dw_pcie_writel_dbi(pci, reg, val); +} + #ifdef CONFIG_PCIE_DW_HOST irqreturn_t dw_handle_msi_irq(struct pcie_port *pp); void dw_pcie_msi_init(struct pcie_port *pp); diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c index e51acee0ddf3..a20179169e06 100644 --- a/drivers/pci/dwc/pcie-hisi.c +++ b/drivers/pci/dwc/pcie-hisi.c @@ -223,7 +223,7 @@ static int hisi_pcie_link_up(struct dw_pcie *pci) return hisi_pcie->soc_ops->hisi_pcie_link_up(hisi_pcie); } -static struct dw_pcie_host_ops hisi_pcie_host_ops = { +static const struct dw_pcie_host_ops hisi_pcie_host_ops = { .rd_own_conf = hisi_pcie_cfg_read, .wr_own_conf = hisi_pcie_cfg_write, }; @@ -268,7 +268,6 @@ static int hisi_pcie_probe(struct platform_device *pdev) struct dw_pcie *pci; struct hisi_pcie *hisi_pcie; struct resource *reg; - struct device_driver *driver; int ret; hisi_pcie = devm_kzalloc(dev, sizeof(*hisi_pcie), GFP_KERNEL); @@ -282,8 +281,6 @@ static int hisi_pcie_probe(struct platform_device *pdev) pci->dev = dev; pci->ops = &dw_pcie_ops; - driver = dev->driver; - hisi_pcie->pci = pci; hisi_pcie->soc_ops = of_device_get_match_data(dev); diff --git a/drivers/pci/dwc/pcie-kirin.c b/drivers/pci/dwc/pcie-kirin.c index 33fddb9f6739..dc3033cf3c19 100644 --- a/drivers/pci/dwc/pcie-kirin.c +++ b/drivers/pci/dwc/pcie-kirin.c @@ -430,9 +430,11 @@ static int kirin_pcie_establish_link(struct pcie_port *pp) return 0; } -static void kirin_pcie_host_init(struct pcie_port *pp) +static int kirin_pcie_host_init(struct pcie_port *pp) { kirin_pcie_establish_link(pp); + + return 0; } static struct dw_pcie_ops kirin_dw_pcie_ops = { @@ -441,7 +443,7 @@ static struct dw_pcie_ops kirin_dw_pcie_ops = { .link_up = kirin_pcie_link_up, }; -static struct dw_pcie_host_ops kirin_pcie_host_ops = { +static const struct dw_pcie_host_ops kirin_pcie_host_ops = { .rd_own_conf = kirin_pcie_rd_own_conf, .wr_own_conf = kirin_pcie_wr_own_conf, .host_init = kirin_pcie_host_init, diff --git a/drivers/pci/dwc/pcie-qcom.c b/drivers/pci/dwc/pcie-qcom.c index 68c5f2ab5bc8..ce7ba5b7552a 100644 --- a/drivers/pci/dwc/pcie-qcom.c +++ b/drivers/pci/dwc/pcie-qcom.c @@ -37,6 +37,20 @@ #include "pcie-designware.h" #define PCIE20_PARF_SYS_CTRL 0x00 +#define MST_WAKEUP_EN BIT(13) +#define SLV_WAKEUP_EN BIT(12) +#define MSTR_ACLK_CGC_DIS BIT(10) +#define SLV_ACLK_CGC_DIS BIT(9) +#define CORE_CLK_CGC_DIS BIT(6) +#define AUX_PWR_DET BIT(4) +#define L23_CLK_RMV_DIS BIT(2) +#define L1_CLK_RMV_DIS BIT(1) + +#define PCIE20_COMMAND_STATUS 0x04 +#define CMD_BME_VAL 0x4 +#define PCIE20_DEVICE_CONTROL2_STATUS2 0x98 +#define PCIE_CAP_CPL_TIMEOUT_DISABLE 0x10 + #define PCIE20_PARF_PHY_CTRL 0x40 #define PCIE20_PARF_PHY_REFCLK 0x4C #define PCIE20_PARF_DBI_BASE_ADDR 0x168 @@ -58,10 +72,22 @@ #define CFG_BRIDGE_SB_INIT BIT(0) #define PCIE20_CAP 0x70 +#define PCIE20_CAP_LINK_CAPABILITIES (PCIE20_CAP + 0xC) +#define PCIE20_CAP_ACTIVE_STATE_LINK_PM_SUPPORT (BIT(10) | BIT(11)) +#define PCIE20_CAP_LINK_1 (PCIE20_CAP + 0x14) +#define PCIE_CAP_LINK1_VAL 0x2FD7F + +#define PCIE20_PARF_Q2A_FLUSH 0x1AC + +#define PCIE20_MISC_CONTROL_1_REG 0x8BC +#define DBI_RO_WR_EN 1 #define PERST_DELAY_US 1000 -struct qcom_pcie_resources_v0 { +#define PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE 0x358 +#define SLV_ADDR_SPACE_SZ 0x10000000 + +struct qcom_pcie_resources_2_1_0 { struct clk *iface_clk; struct clk *core_clk; struct clk *phy_clk; @@ -75,7 +101,7 @@ struct qcom_pcie_resources_v0 { struct regulator *vdda_refclk; }; -struct qcom_pcie_resources_v1 { +struct qcom_pcie_resources_1_0_0 { struct clk *iface; struct clk *aux; struct clk *master_bus; @@ -84,7 +110,7 @@ struct qcom_pcie_resources_v1 { struct regulator *vdda; }; -struct qcom_pcie_resources_v2 { +struct qcom_pcie_resources_2_3_2 { struct clk *aux_clk; struct clk *master_clk; struct clk *slave_clk; @@ -92,7 +118,7 @@ struct qcom_pcie_resources_v2 { struct clk *pipe_clk; }; -struct qcom_pcie_resources_v3 { +struct qcom_pcie_resources_2_4_0 { struct clk *aux_clk; struct clk *master_clk; struct clk *slave_clk; @@ -110,11 +136,21 @@ struct qcom_pcie_resources_v3 { struct reset_control *phy_ahb_reset; }; +struct qcom_pcie_resources_2_3_3 { + struct clk *iface; + struct clk *axi_m_clk; + struct clk *axi_s_clk; + struct clk *ahb_clk; + struct clk *aux_clk; + struct reset_control *rst[7]; +}; + union qcom_pcie_resources { - struct qcom_pcie_resources_v0 v0; - struct qcom_pcie_resources_v1 v1; - struct qcom_pcie_resources_v2 v2; - struct qcom_pcie_resources_v3 v3; + struct qcom_pcie_resources_1_0_0 v1_0_0; + struct qcom_pcie_resources_2_1_0 v2_1_0; + struct qcom_pcie_resources_2_3_2 v2_3_2; + struct qcom_pcie_resources_2_3_3 v2_3_3; + struct qcom_pcie_resources_2_4_0 v2_4_0; }; struct qcom_pcie; @@ -124,6 +160,7 @@ struct qcom_pcie_ops { int (*init)(struct qcom_pcie *pcie); int (*post_init)(struct qcom_pcie *pcie); void (*deinit)(struct qcom_pcie *pcie); + void (*post_deinit)(struct qcom_pcie *pcie); void (*ltssm_enable)(struct qcom_pcie *pcie); }; @@ -141,13 +178,13 @@ struct qcom_pcie { static void qcom_ep_reset_assert(struct qcom_pcie *pcie) { - gpiod_set_value(pcie->reset, 1); + gpiod_set_value_cansleep(pcie->reset, 1); usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); } static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) { - gpiod_set_value(pcie->reset, 0); + gpiod_set_value_cansleep(pcie->reset, 0); usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500); } @@ -172,7 +209,7 @@ static int qcom_pcie_establish_link(struct qcom_pcie *pcie) return dw_pcie_wait_for_link(pci); } -static void qcom_pcie_v0_v1_ltssm_enable(struct qcom_pcie *pcie) +static void qcom_pcie_2_1_0_ltssm_enable(struct qcom_pcie *pcie) { u32 val; @@ -182,9 +219,9 @@ static void qcom_pcie_v0_v1_ltssm_enable(struct qcom_pcie *pcie) writel(val, pcie->elbi + PCIE20_ELBI_SYS_CTRL); } -static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie) +static int qcom_pcie_get_resources_2_1_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v0 *res = &pcie->res.v0; + struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; @@ -212,29 +249,29 @@ static int qcom_pcie_get_resources_v0(struct qcom_pcie *pcie) if (IS_ERR(res->phy_clk)) return PTR_ERR(res->phy_clk); - res->pci_reset = devm_reset_control_get(dev, "pci"); + res->pci_reset = devm_reset_control_get_exclusive(dev, "pci"); if (IS_ERR(res->pci_reset)) return PTR_ERR(res->pci_reset); - res->axi_reset = devm_reset_control_get(dev, "axi"); + res->axi_reset = devm_reset_control_get_exclusive(dev, "axi"); if (IS_ERR(res->axi_reset)) return PTR_ERR(res->axi_reset); - res->ahb_reset = devm_reset_control_get(dev, "ahb"); + res->ahb_reset = devm_reset_control_get_exclusive(dev, "ahb"); if (IS_ERR(res->ahb_reset)) return PTR_ERR(res->ahb_reset); - res->por_reset = devm_reset_control_get(dev, "por"); + res->por_reset = devm_reset_control_get_exclusive(dev, "por"); if (IS_ERR(res->por_reset)) return PTR_ERR(res->por_reset); - res->phy_reset = devm_reset_control_get(dev, "phy"); + res->phy_reset = devm_reset_control_get_exclusive(dev, "phy"); return PTR_ERR_OR_ZERO(res->phy_reset); } -static void qcom_pcie_deinit_v0(struct qcom_pcie *pcie) +static void qcom_pcie_deinit_2_1_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v0 *res = &pcie->res.v0; + struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0; reset_control_assert(res->pci_reset); reset_control_assert(res->axi_reset); @@ -249,9 +286,9 @@ static void qcom_pcie_deinit_v0(struct qcom_pcie *pcie) regulator_disable(res->vdda_refclk); } -static int qcom_pcie_init_v0(struct qcom_pcie *pcie) +static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v0 *res = &pcie->res.v0; + struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; u32 val; @@ -367,9 +404,9 @@ err_refclk: return ret; } -static int qcom_pcie_get_resources_v1(struct qcom_pcie *pcie) +static int qcom_pcie_get_resources_1_0_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v1 *res = &pcie->res.v1; + struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; @@ -393,13 +430,13 @@ static int qcom_pcie_get_resources_v1(struct qcom_pcie *pcie) if (IS_ERR(res->slave_bus)) return PTR_ERR(res->slave_bus); - res->core = devm_reset_control_get(dev, "core"); + res->core = devm_reset_control_get_exclusive(dev, "core"); return PTR_ERR_OR_ZERO(res->core); } -static void qcom_pcie_deinit_v1(struct qcom_pcie *pcie) +static void qcom_pcie_deinit_1_0_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v1 *res = &pcie->res.v1; + struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0; reset_control_assert(res->core); clk_disable_unprepare(res->slave_bus); @@ -409,9 +446,9 @@ static void qcom_pcie_deinit_v1(struct qcom_pcie *pcie) regulator_disable(res->vdda); } -static int qcom_pcie_init_v1(struct qcom_pcie *pcie) +static int qcom_pcie_init_1_0_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v1 *res = &pcie->res.v1; + struct qcom_pcie_resources_1_0_0 *res = &pcie->res.v1_0_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; int ret; @@ -477,7 +514,7 @@ err_res: return ret; } -static void qcom_pcie_v2_ltssm_enable(struct qcom_pcie *pcie) +static void qcom_pcie_2_3_2_ltssm_enable(struct qcom_pcie *pcie) { u32 val; @@ -487,9 +524,9 @@ static void qcom_pcie_v2_ltssm_enable(struct qcom_pcie *pcie) writel(val, pcie->parf + PCIE20_PARF_LTSSM); } -static int qcom_pcie_get_resources_v2(struct qcom_pcie *pcie) +static int qcom_pcie_get_resources_2_3_2(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v2 *res = &pcie->res.v2; + struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; @@ -513,20 +550,26 @@ static int qcom_pcie_get_resources_v2(struct qcom_pcie *pcie) return PTR_ERR_OR_ZERO(res->pipe_clk); } -static void qcom_pcie_deinit_v2(struct qcom_pcie *pcie) +static void qcom_pcie_deinit_2_3_2(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v2 *res = &pcie->res.v2; + struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2; - clk_disable_unprepare(res->pipe_clk); clk_disable_unprepare(res->slave_clk); clk_disable_unprepare(res->master_clk); clk_disable_unprepare(res->cfg_clk); clk_disable_unprepare(res->aux_clk); } -static int qcom_pcie_init_v2(struct qcom_pcie *pcie) +static void qcom_pcie_post_deinit_2_3_2(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v2 *res = &pcie->res.v2; + struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2; + + clk_disable_unprepare(res->pipe_clk); +} + +static int qcom_pcie_init_2_3_2(struct qcom_pcie *pcie) +{ + struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; u32 val; @@ -589,9 +632,9 @@ err_cfg_clk: return ret; } -static int qcom_pcie_post_init_v2(struct qcom_pcie *pcie) +static int qcom_pcie_post_init_2_3_2(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v2 *res = &pcie->res.v2; + struct qcom_pcie_resources_2_3_2 *res = &pcie->res.v2_3_2; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; int ret; @@ -605,9 +648,9 @@ static int qcom_pcie_post_init_v2(struct qcom_pcie *pcie) return 0; } -static int qcom_pcie_get_resources_v3(struct qcom_pcie *pcie) +static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v3 *res = &pcie->res.v3; + struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; @@ -623,60 +666,64 @@ static int qcom_pcie_get_resources_v3(struct qcom_pcie *pcie) if (IS_ERR(res->slave_clk)) return PTR_ERR(res->slave_clk); - res->axi_m_reset = devm_reset_control_get(dev, "axi_m"); + res->axi_m_reset = devm_reset_control_get_exclusive(dev, "axi_m"); if (IS_ERR(res->axi_m_reset)) return PTR_ERR(res->axi_m_reset); - res->axi_s_reset = devm_reset_control_get(dev, "axi_s"); + res->axi_s_reset = devm_reset_control_get_exclusive(dev, "axi_s"); if (IS_ERR(res->axi_s_reset)) return PTR_ERR(res->axi_s_reset); - res->pipe_reset = devm_reset_control_get(dev, "pipe"); + res->pipe_reset = devm_reset_control_get_exclusive(dev, "pipe"); if (IS_ERR(res->pipe_reset)) return PTR_ERR(res->pipe_reset); - res->axi_m_vmid_reset = devm_reset_control_get(dev, "axi_m_vmid"); + res->axi_m_vmid_reset = devm_reset_control_get_exclusive(dev, + "axi_m_vmid"); if (IS_ERR(res->axi_m_vmid_reset)) return PTR_ERR(res->axi_m_vmid_reset); - res->axi_s_xpu_reset = devm_reset_control_get(dev, "axi_s_xpu"); + res->axi_s_xpu_reset = devm_reset_control_get_exclusive(dev, + "axi_s_xpu"); if (IS_ERR(res->axi_s_xpu_reset)) return PTR_ERR(res->axi_s_xpu_reset); - res->parf_reset = devm_reset_control_get(dev, "parf"); + res->parf_reset = devm_reset_control_get_exclusive(dev, "parf"); if (IS_ERR(res->parf_reset)) return PTR_ERR(res->parf_reset); - res->phy_reset = devm_reset_control_get(dev, "phy"); + res->phy_reset = devm_reset_control_get_exclusive(dev, "phy"); if (IS_ERR(res->phy_reset)) return PTR_ERR(res->phy_reset); - res->axi_m_sticky_reset = devm_reset_control_get(dev, "axi_m_sticky"); + res->axi_m_sticky_reset = devm_reset_control_get_exclusive(dev, + "axi_m_sticky"); if (IS_ERR(res->axi_m_sticky_reset)) return PTR_ERR(res->axi_m_sticky_reset); - res->pipe_sticky_reset = devm_reset_control_get(dev, "pipe_sticky"); + res->pipe_sticky_reset = devm_reset_control_get_exclusive(dev, + "pipe_sticky"); if (IS_ERR(res->pipe_sticky_reset)) return PTR_ERR(res->pipe_sticky_reset); - res->pwr_reset = devm_reset_control_get(dev, "pwr"); + res->pwr_reset = devm_reset_control_get_exclusive(dev, "pwr"); if (IS_ERR(res->pwr_reset)) return PTR_ERR(res->pwr_reset); - res->ahb_reset = devm_reset_control_get(dev, "ahb"); + res->ahb_reset = devm_reset_control_get_exclusive(dev, "ahb"); if (IS_ERR(res->ahb_reset)) return PTR_ERR(res->ahb_reset); - res->phy_ahb_reset = devm_reset_control_get(dev, "phy_ahb"); + res->phy_ahb_reset = devm_reset_control_get_exclusive(dev, "phy_ahb"); if (IS_ERR(res->phy_ahb_reset)) return PTR_ERR(res->phy_ahb_reset); return 0; } -static void qcom_pcie_deinit_v3(struct qcom_pcie *pcie) +static void qcom_pcie_deinit_2_4_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v3 *res = &pcie->res.v3; + struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0; reset_control_assert(res->axi_m_reset); reset_control_assert(res->axi_s_reset); @@ -692,9 +739,9 @@ static void qcom_pcie_deinit_v3(struct qcom_pcie *pcie) clk_disable_unprepare(res->slave_clk); } -static int qcom_pcie_init_v3(struct qcom_pcie *pcie) +static int qcom_pcie_init_2_4_0(struct qcom_pcie *pcie) { - struct qcom_pcie_resources_v3 *res = &pcie->res.v3; + struct qcom_pcie_resources_2_4_0 *res = &pcie->res.v2_4_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; u32 val; @@ -884,6 +931,166 @@ err_rst_phy: return ret; } +static int qcom_pcie_get_resources_2_3_3(struct qcom_pcie *pcie) +{ + struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3; + struct dw_pcie *pci = pcie->pci; + struct device *dev = pci->dev; + int i; + const char *rst_names[] = { "axi_m", "axi_s", "pipe", + "axi_m_sticky", "sticky", + "ahb", "sleep", }; + + res->iface = devm_clk_get(dev, "iface"); + if (IS_ERR(res->iface)) + return PTR_ERR(res->iface); + + res->axi_m_clk = devm_clk_get(dev, "axi_m"); + if (IS_ERR(res->axi_m_clk)) + return PTR_ERR(res->axi_m_clk); + + res->axi_s_clk = devm_clk_get(dev, "axi_s"); + if (IS_ERR(res->axi_s_clk)) + return PTR_ERR(res->axi_s_clk); + + res->ahb_clk = devm_clk_get(dev, "ahb"); + if (IS_ERR(res->ahb_clk)) + return PTR_ERR(res->ahb_clk); + + res->aux_clk = devm_clk_get(dev, "aux"); + if (IS_ERR(res->aux_clk)) + return PTR_ERR(res->aux_clk); + + for (i = 0; i < ARRAY_SIZE(rst_names); i++) { + res->rst[i] = devm_reset_control_get(dev, rst_names[i]); + if (IS_ERR(res->rst[i])) + return PTR_ERR(res->rst[i]); + } + + return 0; +} + +static void qcom_pcie_deinit_2_3_3(struct qcom_pcie *pcie) +{ + struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3; + + clk_disable_unprepare(res->iface); + clk_disable_unprepare(res->axi_m_clk); + clk_disable_unprepare(res->axi_s_clk); + clk_disable_unprepare(res->ahb_clk); + clk_disable_unprepare(res->aux_clk); +} + +static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie) +{ + struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3; + struct dw_pcie *pci = pcie->pci; + struct device *dev = pci->dev; + int i, ret; + u32 val; + + for (i = 0; i < ARRAY_SIZE(res->rst); i++) { + ret = reset_control_assert(res->rst[i]); + if (ret) { + dev_err(dev, "reset #%d assert failed (%d)\n", i, ret); + return ret; + } + } + + usleep_range(2000, 2500); + + for (i = 0; i < ARRAY_SIZE(res->rst); i++) { + ret = reset_control_deassert(res->rst[i]); + if (ret) { + dev_err(dev, "reset #%d deassert failed (%d)\n", i, + ret); + return ret; + } + } + + /* + * Don't have a way to see if the reset has completed. + * Wait for some time. + */ + usleep_range(2000, 2500); + + ret = clk_prepare_enable(res->iface); + if (ret) { + dev_err(dev, "cannot prepare/enable core clock\n"); + goto err_clk_iface; + } + + ret = clk_prepare_enable(res->axi_m_clk); + if (ret) { + dev_err(dev, "cannot prepare/enable core clock\n"); + goto err_clk_axi_m; + } + + ret = clk_prepare_enable(res->axi_s_clk); + if (ret) { + dev_err(dev, "cannot prepare/enable axi slave clock\n"); + goto err_clk_axi_s; + } + + ret = clk_prepare_enable(res->ahb_clk); + if (ret) { + dev_err(dev, "cannot prepare/enable ahb clock\n"); + goto err_clk_ahb; + } + + ret = clk_prepare_enable(res->aux_clk); + if (ret) { + dev_err(dev, "cannot prepare/enable aux clock\n"); + goto err_clk_aux; + } + + writel(SLV_ADDR_SPACE_SZ, + pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE); + + val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL); + val &= ~BIT(0); + writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL); + + writel(0, pcie->parf + PCIE20_PARF_DBI_BASE_ADDR); + + writel(MST_WAKEUP_EN | SLV_WAKEUP_EN | MSTR_ACLK_CGC_DIS + | SLV_ACLK_CGC_DIS | CORE_CLK_CGC_DIS | + AUX_PWR_DET | L23_CLK_RMV_DIS | L1_CLK_RMV_DIS, + pcie->parf + PCIE20_PARF_SYS_CTRL); + writel(0, pcie->parf + PCIE20_PARF_Q2A_FLUSH); + + writel(CMD_BME_VAL, pci->dbi_base + PCIE20_COMMAND_STATUS); + writel(DBI_RO_WR_EN, pci->dbi_base + PCIE20_MISC_CONTROL_1_REG); + writel(PCIE_CAP_LINK1_VAL, pci->dbi_base + PCIE20_CAP_LINK_1); + + val = readl(pci->dbi_base + PCIE20_CAP_LINK_CAPABILITIES); + val &= ~PCIE20_CAP_ACTIVE_STATE_LINK_PM_SUPPORT; + writel(val, pci->dbi_base + PCIE20_CAP_LINK_CAPABILITIES); + + writel(PCIE_CAP_CPL_TIMEOUT_DISABLE, pci->dbi_base + + PCIE20_DEVICE_CONTROL2_STATUS2); + + return 0; + +err_clk_aux: + clk_disable_unprepare(res->ahb_clk); +err_clk_ahb: + clk_disable_unprepare(res->axi_s_clk); +err_clk_axi_s: + clk_disable_unprepare(res->axi_m_clk); +err_clk_axi_m: + clk_disable_unprepare(res->iface); +err_clk_iface: + /* + * Not checking for failure, will anyway return + * the original failure in 'ret'. + */ + for (i = 0; i < ARRAY_SIZE(res->rst); i++) + reset_control_assert(res->rst[i]); + + return ret; +} + static int qcom_pcie_link_up(struct dw_pcie *pci) { u16 val = readw(pci->dbi_base + PCIE20_CAP + PCI_EXP_LNKSTA); @@ -891,7 +1098,7 @@ static int qcom_pcie_link_up(struct dw_pcie *pci) return !!(val & PCI_EXP_LNKSTA_DLLLA); } -static void qcom_pcie_host_init(struct pcie_port *pp) +static int qcom_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct qcom_pcie *pcie = to_qcom_pcie(pci); @@ -901,14 +1108,17 @@ static void qcom_pcie_host_init(struct pcie_port *pp) ret = pcie->ops->init(pcie); if (ret) - goto err_deinit; + return ret; ret = phy_power_on(pcie->phy); if (ret) goto err_deinit; - if (pcie->ops->post_init) - pcie->ops->post_init(pcie); + if (pcie->ops->post_init) { + ret = pcie->ops->post_init(pcie); + if (ret) + goto err_disable_phy; + } dw_pcie_setup_rc(pp); @@ -921,12 +1131,17 @@ static void qcom_pcie_host_init(struct pcie_port *pp) if (ret) goto err; - return; + return 0; err: qcom_ep_reset_assert(pcie); + if (pcie->ops->post_deinit) + pcie->ops->post_deinit(pcie); +err_disable_phy: phy_power_off(pcie->phy); err_deinit: pcie->ops->deinit(pcie); + + return ret; } static int qcom_pcie_rd_own_conf(struct pcie_port *pp, int where, int size, @@ -950,37 +1165,50 @@ static const struct dw_pcie_host_ops qcom_pcie_dw_ops = { .rd_own_conf = qcom_pcie_rd_own_conf, }; -static const struct qcom_pcie_ops ops_v0 = { - .get_resources = qcom_pcie_get_resources_v0, - .init = qcom_pcie_init_v0, - .deinit = qcom_pcie_deinit_v0, - .ltssm_enable = qcom_pcie_v0_v1_ltssm_enable, +/* Qcom IP rev.: 2.1.0 Synopsys IP rev.: 4.01a */ +static const struct qcom_pcie_ops ops_2_1_0 = { + .get_resources = qcom_pcie_get_resources_2_1_0, + .init = qcom_pcie_init_2_1_0, + .deinit = qcom_pcie_deinit_2_1_0, + .ltssm_enable = qcom_pcie_2_1_0_ltssm_enable, }; -static const struct qcom_pcie_ops ops_v1 = { - .get_resources = qcom_pcie_get_resources_v1, - .init = qcom_pcie_init_v1, - .deinit = qcom_pcie_deinit_v1, - .ltssm_enable = qcom_pcie_v0_v1_ltssm_enable, +/* Qcom IP rev.: 1.0.0 Synopsys IP rev.: 4.11a */ +static const struct qcom_pcie_ops ops_1_0_0 = { + .get_resources = qcom_pcie_get_resources_1_0_0, + .init = qcom_pcie_init_1_0_0, + .deinit = qcom_pcie_deinit_1_0_0, + .ltssm_enable = qcom_pcie_2_1_0_ltssm_enable, }; -static const struct qcom_pcie_ops ops_v2 = { - .get_resources = qcom_pcie_get_resources_v2, - .init = qcom_pcie_init_v2, - .post_init = qcom_pcie_post_init_v2, - .deinit = qcom_pcie_deinit_v2, - .ltssm_enable = qcom_pcie_v2_ltssm_enable, +/* Qcom IP rev.: 2.3.2 Synopsys IP rev.: 4.21a */ +static const struct qcom_pcie_ops ops_2_3_2 = { + .get_resources = qcom_pcie_get_resources_2_3_2, + .init = qcom_pcie_init_2_3_2, + .post_init = qcom_pcie_post_init_2_3_2, + .deinit = qcom_pcie_deinit_2_3_2, + .post_deinit = qcom_pcie_post_deinit_2_3_2, + .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, }; -static const struct dw_pcie_ops dw_pcie_ops = { - .link_up = qcom_pcie_link_up, +/* Qcom IP rev.: 2.4.0 Synopsys IP rev.: 4.20a */ +static const struct qcom_pcie_ops ops_2_4_0 = { + .get_resources = qcom_pcie_get_resources_2_4_0, + .init = qcom_pcie_init_2_4_0, + .deinit = qcom_pcie_deinit_2_4_0, + .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, }; -static const struct qcom_pcie_ops ops_v3 = { - .get_resources = qcom_pcie_get_resources_v3, - .init = qcom_pcie_init_v3, - .deinit = qcom_pcie_deinit_v3, - .ltssm_enable = qcom_pcie_v2_ltssm_enable, +/* Qcom IP rev.: 2.3.3 Synopsys IP rev.: 4.30a */ +static const struct qcom_pcie_ops ops_2_3_3 = { + .get_resources = qcom_pcie_get_resources_2_3_3, + .init = qcom_pcie_init_2_3_3, + .deinit = qcom_pcie_deinit_2_3_3, + .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, +}; + +static const struct dw_pcie_ops dw_pcie_ops = { + .link_up = qcom_pcie_link_up, }; static int qcom_pcie_probe(struct platform_device *pdev) @@ -1069,11 +1297,12 @@ static int qcom_pcie_probe(struct platform_device *pdev) } static const struct of_device_id qcom_pcie_match[] = { - { .compatible = "qcom,pcie-ipq8064", .data = &ops_v0 }, - { .compatible = "qcom,pcie-apq8064", .data = &ops_v0 }, - { .compatible = "qcom,pcie-apq8084", .data = &ops_v1 }, - { .compatible = "qcom,pcie-msm8996", .data = &ops_v2 }, - { .compatible = "qcom,pcie-ipq4019", .data = &ops_v3 }, + { .compatible = "qcom,pcie-apq8084", .data = &ops_1_0_0 }, + { .compatible = "qcom,pcie-ipq8064", .data = &ops_2_1_0 }, + { .compatible = "qcom,pcie-apq8064", .data = &ops_2_1_0 }, + { .compatible = "qcom,pcie-msm8996", .data = &ops_2_3_2 }, + { .compatible = "qcom,pcie-ipq8074", .data = &ops_2_3_3 }, + { .compatible = "qcom,pcie-ipq4019", .data = &ops_2_4_0 }, { } }; diff --git a/drivers/pci/dwc/pcie-spear13xx.c b/drivers/pci/dwc/pcie-spear13xx.c index 80897291e0fb..709189d23b31 100644 --- a/drivers/pci/dwc/pcie-spear13xx.c +++ b/drivers/pci/dwc/pcie-spear13xx.c @@ -177,13 +177,15 @@ static int spear13xx_pcie_link_up(struct dw_pcie *pci) return 0; } -static void spear13xx_pcie_host_init(struct pcie_port *pp) +static int spear13xx_pcie_host_init(struct pcie_port *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct spear13xx_pcie *spear13xx_pcie = to_spear13xx_pcie(pci); spear13xx_pcie_establish_link(spear13xx_pcie); spear13xx_pcie_enable_interrupts(spear13xx_pcie); + + return 0; } static const struct dw_pcie_host_ops spear13xx_pcie_host_ops = { @@ -199,9 +201,9 @@ static int spear13xx_add_pcie_port(struct spear13xx_pcie *spear13xx_pcie, int ret; pp->irq = platform_get_irq(pdev, 0); - if (!pp->irq) { + if (pp->irq < 0) { dev_err(dev, "failed to get irq\n"); - return -ENODEV; + return pp->irq; } ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler, IRQF_SHARED | IRQF_NO_THREAD, diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 53fff8030337..4ddc6e8f9fe7 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -54,6 +54,8 @@ static struct workqueue_struct *kpcitest_workqueue; struct pci_epf_test { void *reg[6]; struct pci_epf *epf; + enum pci_barno test_reg_bar; + bool linkup_notifier; struct delayed_work cmd_handler; }; @@ -74,7 +76,12 @@ static struct pci_epf_header test_header = { .interrupt_pin = PCI_INTERRUPT_INTA, }; -static int bar_size[] = { 512, 1024, 16384, 131072, 1048576 }; +struct pci_epf_test_data { + enum pci_barno test_reg_bar; + bool linkup_notifier; +}; + +static int bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 }; static int pci_epf_test_copy(struct pci_epf_test *epf_test) { @@ -86,7 +93,8 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test) struct pci_epf *epf = epf_test->epf; struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; - struct pci_epf_test_reg *reg = epf_test->reg[0]; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; src_addr = pci_epc_mem_alloc_addr(epc, &src_phys_addr, reg->size); if (!src_addr) { @@ -145,7 +153,8 @@ static int pci_epf_test_read(struct pci_epf_test *epf_test) struct pci_epf *epf = epf_test->epf; struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; - struct pci_epf_test_reg *reg = epf_test->reg[0]; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; src_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size); if (!src_addr) { @@ -195,7 +204,8 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test) struct pci_epf *epf = epf_test->epf; struct device *dev = &epf->dev; struct pci_epc *epc = epf->epc; - struct pci_epf_test_reg *reg = epf_test->reg[0]; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; dst_addr = pci_epc_mem_alloc_addr(epc, &phys_addr, reg->size); if (!dst_addr) { @@ -247,7 +257,8 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test) u8 msi_count; struct pci_epf *epf = epf_test->epf; struct pci_epc *epc = epf->epc; - struct pci_epf_test_reg *reg = epf_test->reg[0]; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; reg->status |= STATUS_IRQ_RAISED; msi_count = pci_epc_get_msi(epc); @@ -263,22 +274,28 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) int ret; u8 irq; u8 msi_count; + u32 command; struct pci_epf_test *epf_test = container_of(work, struct pci_epf_test, cmd_handler.work); struct pci_epf *epf = epf_test->epf; struct pci_epc *epc = epf->epc; - struct pci_epf_test_reg *reg = epf_test->reg[0]; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; + struct pci_epf_test_reg *reg = epf_test->reg[test_reg_bar]; - if (!reg->command) + command = reg->command; + if (!command) goto reset_handler; - if (reg->command & COMMAND_RAISE_LEGACY_IRQ) { + reg->command = 0; + reg->status = 0; + + if (command & COMMAND_RAISE_LEGACY_IRQ) { reg->status = STATUS_IRQ_RAISED; pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0); goto reset_handler; } - if (reg->command & COMMAND_WRITE) { + if (command & COMMAND_WRITE) { ret = pci_epf_test_write(epf_test); if (ret) reg->status |= STATUS_WRITE_FAIL; @@ -288,7 +305,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) goto reset_handler; } - if (reg->command & COMMAND_READ) { + if (command & COMMAND_READ) { ret = pci_epf_test_read(epf_test); if (!ret) reg->status |= STATUS_READ_SUCCESS; @@ -298,7 +315,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) goto reset_handler; } - if (reg->command & COMMAND_COPY) { + if (command & COMMAND_COPY) { ret = pci_epf_test_copy(epf_test); if (!ret) reg->status |= STATUS_COPY_SUCCESS; @@ -308,9 +325,9 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) goto reset_handler; } - if (reg->command & COMMAND_RAISE_MSI_IRQ) { + if (command & COMMAND_RAISE_MSI_IRQ) { msi_count = pci_epc_get_msi(epc); - irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; + irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; if (irq > msi_count || msi_count <= 0) goto reset_handler; reg->status = STATUS_IRQ_RAISED; @@ -319,8 +336,6 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) } reset_handler: - reg->command = 0; - queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler, msecs_to_jiffies(1)); } @@ -358,6 +373,7 @@ static int pci_epf_test_set_bar(struct pci_epf *epf) struct pci_epc *epc = epf->epc; struct device *dev = &epf->dev; struct pci_epf_test *epf_test = epf_get_drvdata(epf); + enum pci_barno test_reg_bar = epf_test->test_reg_bar; flags = PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_32; if (sizeof(dma_addr_t) == 0x8) @@ -370,7 +386,7 @@ static int pci_epf_test_set_bar(struct pci_epf *epf) if (ret) { pci_epf_free_space(epf, epf_test->reg[bar], bar); dev_err(dev, "failed to set BAR%d\n", bar); - if (bar == BAR_0) + if (bar == test_reg_bar) return ret; } } @@ -384,17 +400,20 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf) struct device *dev = &epf->dev; void *base; int bar; + enum pci_barno test_reg_bar = epf_test->test_reg_bar; base = pci_epf_alloc_space(epf, sizeof(struct pci_epf_test_reg), - BAR_0); + test_reg_bar); if (!base) { dev_err(dev, "failed to allocated register space\n"); return -ENOMEM; } - epf_test->reg[0] = base; + epf_test->reg[test_reg_bar] = base; - for (bar = BAR_1; bar <= BAR_5; bar++) { - base = pci_epf_alloc_space(epf, bar_size[bar - 1], bar); + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (bar == test_reg_bar) + continue; + base = pci_epf_alloc_space(epf, bar_size[bar], bar); if (!base) dev_err(dev, "failed to allocate space for BAR%d\n", bar); @@ -407,6 +426,7 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf) static int pci_epf_test_bind(struct pci_epf *epf) { int ret; + struct pci_epf_test *epf_test = epf_get_drvdata(epf); struct pci_epf_header *header = epf->header; struct pci_epc *epc = epf->epc; struct device *dev = &epf->dev; @@ -432,13 +452,34 @@ static int pci_epf_test_bind(struct pci_epf *epf) if (ret) return ret; + if (!epf_test->linkup_notifier) + queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work); + return 0; } +static const struct pci_epf_device_id pci_epf_test_ids[] = { + { + .name = "pci_epf_test", + }, + {}, +}; + static int pci_epf_test_probe(struct pci_epf *epf) { struct pci_epf_test *epf_test; struct device *dev = &epf->dev; + const struct pci_epf_device_id *match; + struct pci_epf_test_data *data; + enum pci_barno test_reg_bar = BAR_0; + bool linkup_notifier = true; + + match = pci_epf_match_device(pci_epf_test_ids, epf); + data = (struct pci_epf_test_data *)match->driver_data; + if (data) { + test_reg_bar = data->test_reg_bar; + linkup_notifier = data->linkup_notifier; + } epf_test = devm_kzalloc(dev, sizeof(*epf_test), GFP_KERNEL); if (!epf_test) @@ -446,6 +487,8 @@ static int pci_epf_test_probe(struct pci_epf *epf) epf->header = &test_header; epf_test->epf = epf; + epf_test->test_reg_bar = test_reg_bar; + epf_test->linkup_notifier = linkup_notifier; INIT_DELAYED_WORK(&epf_test->cmd_handler, pci_epf_test_cmd_handler); @@ -453,31 +496,15 @@ static int pci_epf_test_probe(struct pci_epf *epf) return 0; } -static int pci_epf_test_remove(struct pci_epf *epf) -{ - struct pci_epf_test *epf_test = epf_get_drvdata(epf); - - kfree(epf_test); - return 0; -} - static struct pci_epf_ops ops = { .unbind = pci_epf_test_unbind, .bind = pci_epf_test_bind, .linkup = pci_epf_test_linkup, }; -static const struct pci_epf_device_id pci_epf_test_ids[] = { - { - .name = "pci_epf_test", - }, - {}, -}; - static struct pci_epf_driver test_driver = { .driver.name = "pci_epf_test", .probe = pci_epf_test_probe, - .remove = pci_epf_test_remove, .id_table = pci_epf_test_ids, .ops = &ops, .owner = THIS_MODULE, diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index caa7be10e473..42c2a1156325 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -21,6 +21,7 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/pci-epc.h> #include <linux/pci-epf.h> @@ -370,6 +371,7 @@ EXPORT_SYMBOL_GPL(pci_epc_write_header); int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf) { unsigned long flags; + struct device *dev = epc->dev.parent; if (epf->epc) return -EBUSY; @@ -381,8 +383,12 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf) return -EINVAL; epf->epc = epc; - dma_set_coherent_mask(&epf->dev, epc->dev.coherent_dma_mask); - epf->dev.dma_mask = epc->dev.dma_mask; + if (dev->of_node) { + of_dma_configure(&epf->dev, dev->of_node); + } else { + dma_set_coherent_mask(&epf->dev, epc->dev.coherent_dma_mask); + epf->dev.dma_mask = epc->dev.dma_mask; + } spin_lock_irqsave(&epc->lock, flags); list_add_tail(&epf->list, &epc->pci_epf); @@ -500,6 +506,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, dma_set_coherent_mask(&epc->dev, dev->coherent_dma_mask); epc->dev.class = pci_epc_class; epc->dev.dma_mask = dev->dma_mask; + epc->dev.parent = dev; epc->ops = ops; ret = dev_set_name(&epc->dev, "%s", dev_name(dev)); diff --git a/drivers/pci/endpoint/pci-epc-mem.c b/drivers/pci/endpoint/pci-epc-mem.c index 3a94cc1caf22..83b7d5d3fc3e 100644 --- a/drivers/pci/endpoint/pci-epc-mem.c +++ b/drivers/pci/endpoint/pci-epc-mem.c @@ -24,21 +24,54 @@ #include <linux/pci-epc.h> /** - * pci_epc_mem_init() - initialize the pci_epc_mem structure + * pci_epc_mem_get_order() - determine the allocation order of a memory size + * @mem: address space of the endpoint controller + * @size: the size for which to get the order + * + * Reimplement get_order() for mem->page_size since the generic get_order + * always gets order with a constant PAGE_SIZE. + */ +static int pci_epc_mem_get_order(struct pci_epc_mem *mem, size_t size) +{ + int order; + unsigned int page_shift = ilog2(mem->page_size); + + size--; + size >>= page_shift; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} + +/** + * __pci_epc_mem_init() - initialize the pci_epc_mem structure * @epc: the EPC device that invoked pci_epc_mem_init * @phys_base: the physical address of the base * @size: the size of the address space + * @page_size: size of each page * * Invoke to initialize the pci_epc_mem structure used by the * endpoint functions to allocate mapped PCI address. */ -int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size) +int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size, + size_t page_size) { int ret; struct pci_epc_mem *mem; unsigned long *bitmap; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + unsigned int page_shift; + int pages; + int bitmap_size; + + if (page_size < PAGE_SIZE) + page_size = PAGE_SIZE; + + page_shift = ilog2(page_size); + pages = size >> page_shift; + bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) { @@ -54,6 +87,7 @@ int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_base, size_t size) mem->bitmap = bitmap; mem->phys_base = phys_base; + mem->page_size = page_size; mem->pages = pages; mem->size = size; @@ -67,7 +101,7 @@ err_mem: err: return ret; } -EXPORT_SYMBOL_GPL(pci_epc_mem_init); +EXPORT_SYMBOL_GPL(__pci_epc_mem_init); /** * pci_epc_mem_exit() - cleanup the pci_epc_mem structure @@ -101,13 +135,17 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, int pageno; void __iomem *virt_addr; struct pci_epc_mem *mem = epc->mem; - int order = get_order(size); + unsigned int page_shift = ilog2(mem->page_size); + int order; + + size = ALIGN(size, mem->page_size); + order = pci_epc_mem_get_order(mem, size); pageno = bitmap_find_free_region(mem->bitmap, mem->pages, order); if (pageno < 0) return NULL; - *phys_addr = mem->phys_base + (pageno << PAGE_SHIFT); + *phys_addr = mem->phys_base + (pageno << page_shift); virt_addr = ioremap(*phys_addr, size); if (!virt_addr) bitmap_release_region(mem->bitmap, pageno, order); @@ -129,11 +167,14 @@ void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size) { int pageno; - int order = get_order(size); struct pci_epc_mem *mem = epc->mem; + unsigned int page_shift = ilog2(mem->page_size); + int order; iounmap(virt_addr); - pageno = (phys_addr - mem->phys_base) >> PAGE_SHIFT; + pageno = (phys_addr - mem->phys_base) >> page_shift; + size = ALIGN(size, mem->page_size); + order = pci_epc_mem_get_order(mem, size); bitmap_release_region(mem->bitmap, pageno, order); } EXPORT_SYMBOL_GPL(pci_epc_mem_free_addr); diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 6877d6a5bcc9..ae1611a62808 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -27,7 +27,7 @@ #include <linux/pci-ep-cfs.h> static struct bus_type pci_epf_bus_type; -static struct device_type pci_epf_type; +static const struct device_type pci_epf_type; /** * pci_epf_linkup() - Notify the function driver that EPC device has @@ -267,6 +267,22 @@ err_ret: } EXPORT_SYMBOL_GPL(pci_epf_create); +const struct pci_epf_device_id * +pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf) +{ + if (!id || !epf) + return NULL; + + while (*id->name) { + if (strcmp(epf->name, id->name) == 0) + return id; + id++; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(pci_epf_match_device); + static void pci_epf_dev_release(struct device *dev) { struct pci_epf *epf = to_pci_epf(dev); @@ -275,7 +291,7 @@ static void pci_epf_dev_release(struct device *dev) kfree(epf); } -static struct device_type pci_epf_type = { +static const struct device_type pci_epf_type = { .release = pci_epf_dev_release, }; @@ -317,11 +333,12 @@ static int pci_epf_device_probe(struct device *dev) static int pci_epf_device_remove(struct device *dev) { - int ret; + int ret = 0; struct pci_epf *epf = to_pci_epf(dev); struct pci_epf_driver *driver = to_pci_epf_driver(dev->driver); - ret = driver->remove(epf); + if (driver->remove) + ret = driver->remove(epf); epf->driver = NULL; return ret; diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 89d61c2cbfaa..b868803792d8 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -71,7 +71,7 @@ config PCI_HOST_GENERIC config PCIE_XILINX bool "Xilinx AXI PCIe host bridge support" - depends on ARCH_ZYNQ || MICROBLAZE + depends on ARCH_ZYNQ || MICROBLAZE || (MIPS && PCI_DRIVERS_GENERIC) help Say 'Y' here if you want kernel to support the Xilinx AXI PCIe Host Bridge driver. @@ -182,14 +182,13 @@ config PCIE_ROCKCHIP config PCIE_MEDIATEK bool "MediaTek PCIe controller" - depends on ARM && (ARCH_MEDIATEK || COMPILE_TEST) + depends on (ARM || ARM64) && (ARCH_MEDIATEK || COMPILE_TEST) depends on OF depends on PCI select PCIEPORTBUS help Say Y here if you want to enable PCIe controller support on - MT7623 series SoCs. There is one single root complex with 3 root - ports available. Each port supports Gen2 lane x1. + MediaTek SoCs. config PCIE_TANGO_SMP8759 bool "Tango SMP8759 PCIe controller (DANGEROUS)" diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 5fb9b620ac78..89f4e3d072d7 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -191,7 +191,6 @@ #define LINK_WAIT_USLEEP_MIN 90000 #define LINK_WAIT_USLEEP_MAX 100000 -#define LEGACY_IRQ_NUM 4 #define MSI_IRQ_NUM 32 struct advk_pcie { @@ -729,7 +728,7 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie) irq_chip->irq_unmask = advk_pcie_irq_unmask; pcie->irq_domain = - irq_domain_add_linear(pcie_intc_node, LEGACY_IRQ_NUM, + irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, &advk_pcie_irq_domain_ops, pcie); if (!pcie->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); @@ -786,7 +785,7 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie) advk_pcie_handle_msi(pcie); /* Process legacy interrupts */ - for (i = 0; i < LEGACY_IRQ_NUM; i++) { + for (i = 0; i < PCI_NUM_INTX; i++) { if (!(status & PCIE_ISR0_INTX_ASSERT(i))) continue; diff --git a/drivers/pci/host/pci-ftpci100.c b/drivers/pci/host/pci-ftpci100.c index 5162dffc102b..96028f01bc90 100644 --- a/drivers/pci/host/pci-ftpci100.c +++ b/drivers/pci/host/pci-ftpci100.c @@ -350,12 +350,12 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p) /* All PCI IRQs cascade off this one */ irq = of_irq_get(intc, 0); - if (!irq) { + if (irq <= 0) { dev_err(p->dev, "failed to get parent IRQ\n"); - return -EINVAL; + return irq ?: -EINVAL; } - p->irqdomain = irq_domain_add_linear(intc, 4, + p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX, &faraday_pci_irqdomain_ops, p); if (!p->irqdomain) { dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n"); diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index aba041438566..0fe3ea164ee5 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -50,6 +50,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/delay.h> #include <linux/semaphore.h> #include <linux/irqdomain.h> #include <asm/irqdomain.h> @@ -1113,7 +1114,12 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) goto free_int_desc; } - wait_for_completion(&comp.comp_pkt.host_event); + /* + * Since this function is called with IRQ locks held, can't + * do normal wait for completion; instead poll. + */ + while (!try_wait_for_completion(&comp.comp_pkt.host_event)) + udelay(100); if (comp.comp_pkt.completion_status < 0) { dev_err(&hbus->hdev->device, diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index f353a6eb2f01..8d88f19dc171 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -1054,8 +1054,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, port->pcie = pcie; if (of_property_read_u32(child, "marvell,pcie-port", &port->port)) { - dev_warn(dev, "ignoring %s, missing pcie-port property\n", - of_node_full_name(child)); + dev_warn(dev, "ignoring %pOF, missing pcie-port property\n", + child); goto skip; } @@ -1106,8 +1106,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, } if (flags & OF_GPIO_ACTIVE_LOW) { - dev_info(dev, "%s: reset gpio is active low\n", - of_node_full_name(child)); + dev_info(dev, "%pOF: reset gpio is active low\n", + child); gpio_flags = GPIOF_ACTIVE_LOW | GPIOF_OUT_INIT_LOW; } else { @@ -1186,8 +1186,7 @@ static int mvebu_pcie_powerup(struct mvebu_pcie_port *port) */ static void mvebu_pcie_powerdown(struct mvebu_pcie_port *port) { - if (port->reset_gpio) - gpiod_set_value_cansleep(port->reset_gpio, 1); + gpiod_set_value_cansleep(port->reset_gpio, 1); clk_disable_unprepare(port->clk); } diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index b3722b7709df..9c40da54f88a 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -1147,15 +1147,15 @@ static int tegra_pcie_resets_get(struct tegra_pcie *pcie) { struct device *dev = pcie->dev; - pcie->pex_rst = devm_reset_control_get(dev, "pex"); + pcie->pex_rst = devm_reset_control_get_exclusive(dev, "pex"); if (IS_ERR(pcie->pex_rst)) return PTR_ERR(pcie->pex_rst); - pcie->afi_rst = devm_reset_control_get(dev, "afi"); + pcie->afi_rst = devm_reset_control_get_exclusive(dev, "afi"); if (IS_ERR(pcie->afi_rst)) return PTR_ERR(pcie->afi_rst); - pcie->pcie_xrst = devm_reset_control_get(dev, "pcie_x"); + pcie->pcie_xrst = devm_reset_control_get_exclusive(dev, "pcie_x"); if (IS_ERR(pcie->pcie_xrst)) return PTR_ERR(pcie->pcie_xrst); @@ -1703,8 +1703,7 @@ static int tegra_pcie_get_legacy_regulators(struct tegra_pcie *pcie) pcie->num_supplies = 2; if (pcie->num_supplies == 0) { - dev_err(dev, "device %s not supported in legacy mode\n", - np->full_name); + dev_err(dev, "device %pOF not supported in legacy mode\n", np); return -ENODEV; } diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c index f1b633bce525..1f42a202b021 100644 --- a/drivers/pci/host/pci-xgene-msi.c +++ b/drivers/pci/host/pci-xgene-msi.c @@ -489,7 +489,7 @@ static int xgene_msi_probe(struct platform_device *pdev) if (virt_msir < 0) { dev_err(&pdev->dev, "Cannot translate IRQ index %d\n", irq_index); - rc = -EINVAL; + rc = virt_msir; goto error; } xgene_msi->msi_groups[irq_index].gic_irq = virt_msir; diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index bd897479a215..087645116ecb 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -61,7 +61,7 @@ #define SZ_1T (SZ_1G*1024ULL) #define PIPE_PHY_RATE_RD(src) ((0xc000 & (u32)(src)) >> 0xe) -#define ROOT_CAP_AND_CTRL 0x5C +#define XGENE_V1_PCI_EXP_CAP 0x40 /* PCIe IP version */ #define XGENE_PCIE_IP_VER_UNKN 0 @@ -160,7 +160,7 @@ static bool xgene_pcie_hide_rc_bars(struct pci_bus *bus, int offset) } static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn, - int offset) + int offset) { if ((pci_is_root_bus(bus) && devfn != 0) || xgene_pcie_hide_rc_bars(bus, offset)) @@ -189,7 +189,7 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn, * Avoid this by not claiming to support CRS. */ if (pci_is_root_bus(bus) && (port->version == XGENE_PCIE_IP_VER_1) && - ((where & ~0x3) == ROOT_CAP_AND_CTRL)) + ((where & ~0x3) == XGENE_V1_PCI_EXP_CAP + PCI_EXP_RTCTL)) *val &= ~(PCI_EXP_RTCAP_CRSVIS << 16); if (size <= 2) @@ -265,12 +265,12 @@ static int xgene_v1_pcie_ecam_init(struct pci_config_window *cfg) } struct pci_ecam_ops xgene_v1_pcie_ecam_ops = { - .bus_shift = 16, - .init = xgene_v1_pcie_ecam_init, - .pci_ops = { - .map_bus = xgene_pcie_map_bus, - .read = xgene_pcie_config_read32, - .write = pci_generic_config_write, + .bus_shift = 16, + .init = xgene_v1_pcie_ecam_init, + .pci_ops = { + .map_bus = xgene_pcie_map_bus, + .read = xgene_pcie_config_read32, + .write = pci_generic_config_write, } }; @@ -280,12 +280,12 @@ static int xgene_v2_pcie_ecam_init(struct pci_config_window *cfg) } struct pci_ecam_ops xgene_v2_pcie_ecam_ops = { - .bus_shift = 16, - .init = xgene_v2_pcie_ecam_init, - .pci_ops = { - .map_bus = xgene_pcie_map_bus, - .read = xgene_pcie_config_read32, - .write = pci_generic_config_write, + .bus_shift = 16, + .init = xgene_v2_pcie_ecam_init, + .pci_ops = { + .map_bus = xgene_pcie_map_bus, + .read = xgene_pcie_config_read32, + .write = pci_generic_config_write, } }; #endif @@ -318,7 +318,7 @@ static u64 xgene_pcie_set_ib_mask(struct xgene_pcie_port *port, u32 addr, } static void xgene_pcie_linkup(struct xgene_pcie_port *port, - u32 *lanes, u32 *speed) + u32 *lanes, u32 *speed) { u32 val32; @@ -593,8 +593,7 @@ static void xgene_pcie_clear_config(struct xgene_pcie_port *port) xgene_pcie_writel(port, i, 0); } -static int xgene_pcie_setup(struct xgene_pcie_port *port, - struct list_head *res, +static int xgene_pcie_setup(struct xgene_pcie_port *port, struct list_head *res, resource_size_t io_base) { struct device *dev = port->dev; @@ -706,9 +705,9 @@ static const struct of_device_id xgene_pcie_match_table[] = { static struct platform_driver xgene_pcie_driver = { .driver = { - .name = "xgene-pcie", - .of_match_table = of_match_ptr(xgene_pcie_match_table), - .suppress_bind_attrs = true, + .name = "xgene-pcie", + .of_match_table = of_match_ptr(xgene_pcie_match_table), + .suppress_bind_attrs = true, }, .probe = xgene_pcie_probe_bridge, }; diff --git a/drivers/pci/host/pcie-altera-msi.c b/drivers/pci/host/pcie-altera-msi.c index 4e5d628e8cd4..d8141f4865de 100644 --- a/drivers/pci/host/pcie-altera-msi.c +++ b/drivers/pci/host/pcie-altera-msi.c @@ -64,13 +64,11 @@ static void altera_msi_isr(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); struct altera_msi *msi; unsigned long status; - u32 num_of_vectors; u32 bit; u32 virq; chained_irq_enter(chip, desc); msi = irq_desc_get_handler_data(desc); - num_of_vectors = msi->num_of_vectors; while ((status = msi_readl(msi, MSI_STATUS)) != 0) { for_each_set_bit(bit, &status, msi->num_of_vectors) { @@ -267,9 +265,9 @@ static int altera_msi_probe(struct platform_device *pdev) return ret; msi->irq = platform_get_irq(pdev, 0); - if (msi->irq <= 0) { + if (msi->irq < 0) { dev_err(&pdev->dev, "failed to map IRQ: %d\n", msi->irq); - ret = -ENODEV; + ret = msi->irq; goto err; } diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c index 4ea4f8f5dc77..b468b8cccf8d 100644 --- a/drivers/pci/host/pcie-altera.c +++ b/drivers/pci/host/pcie-altera.c @@ -76,8 +76,6 @@ #define LINK_UP_TIMEOUT HZ #define LINK_RETRAIN_TIMEOUT HZ -#define INTX_NUM 4 - #define DWORD_MASK 3 struct altera_pcie { @@ -464,6 +462,7 @@ static int altera_pcie_intx_map(struct irq_domain *domain, unsigned int irq, static const struct irq_domain_ops intx_domain_ops = { .map = altera_pcie_intx_map, + .xlate = pci_irqd_intx_xlate, }; static void altera_pcie_isr(struct irq_desc *desc) @@ -481,11 +480,11 @@ static void altera_pcie_isr(struct irq_desc *desc) while ((status = cra_readl(pcie, P2A_INT_STATUS) & P2A_INT_STS_ALL) != 0) { - for_each_set_bit(bit, &status, INTX_NUM) { + for_each_set_bit(bit, &status, PCI_NUM_INTX) { /* clear interrupts */ cra_writel(pcie, 1 << bit, P2A_INT_STATUS); - virq = irq_find_mapping(pcie->irq_domain, bit + 1); + virq = irq_find_mapping(pcie->irq_domain, bit); if (virq) generic_handle_irq(virq); else @@ -536,7 +535,7 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie) struct device_node *node = dev->of_node; /* Setup INTx */ - pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM + 1, + pcie->irq_domain = irq_domain_add_linear(node, PCI_NUM_INTX, &intx_domain_ops, pcie); if (!pcie->irq_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); @@ -559,9 +558,9 @@ static int altera_pcie_parse_dt(struct altera_pcie *pcie) /* setup IRQ */ pcie->irq = platform_get_irq(pdev, 0); - if (pcie->irq <= 0) { + if (pcie->irq < 0) { dev_err(dev, "failed to get IRQ: %d\n", pcie->irq); - return -EINVAL; + return pcie->irq; } irq_set_chained_handler_and_data(pcie->irq, altera_pcie_isr, pcie); diff --git a/drivers/pci/host/pcie-iproc-msi.c b/drivers/pci/host/pcie-iproc-msi.c index 9fad7915f82a..2d0f535a2f69 100644 --- a/drivers/pci/host/pcie-iproc-msi.c +++ b/drivers/pci/host/pcie-iproc-msi.c @@ -317,7 +317,6 @@ static void iproc_msi_handler(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); struct iproc_msi_grp *grp; struct iproc_msi *msi; - struct iproc_pcie *pcie; u32 eq, head, tail, nr_events; unsigned long hwirq; int virq; @@ -326,7 +325,6 @@ static void iproc_msi_handler(struct irq_desc *desc) grp = irq_desc_get_handler_data(desc); msi = grp->msi; - pcie = msi->pcie; eq = grp->eq; /* diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c index 22531190bc40..a5073a921a04 100644 --- a/drivers/pci/host/pcie-iproc-platform.c +++ b/drivers/pci/host/pcie-iproc-platform.c @@ -134,6 +134,13 @@ static int iproc_pcie_pltfm_remove(struct platform_device *pdev) return iproc_pcie_remove(pcie); } +static void iproc_pcie_pltfm_shutdown(struct platform_device *pdev) +{ + struct iproc_pcie *pcie = platform_get_drvdata(pdev); + + iproc_pcie_shutdown(pcie); +} + static struct platform_driver iproc_pcie_pltfm_driver = { .driver = { .name = "iproc-pcie", @@ -141,6 +148,7 @@ static struct platform_driver iproc_pcie_pltfm_driver = { }, .probe = iproc_pcie_pltfm_probe, .remove = iproc_pcie_pltfm_remove, + .shutdown = iproc_pcie_pltfm_shutdown, }; module_platform_driver(iproc_pcie_pltfm_driver); diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index c57486348856..3a8b9d20ee57 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -31,68 +31,71 @@ #include "pcie-iproc.h" -#define EP_PERST_SOURCE_SELECT_SHIFT 2 -#define EP_PERST_SOURCE_SELECT BIT(EP_PERST_SOURCE_SELECT_SHIFT) -#define EP_MODE_SURVIVE_PERST_SHIFT 1 -#define EP_MODE_SURVIVE_PERST BIT(EP_MODE_SURVIVE_PERST_SHIFT) -#define RC_PCIE_RST_OUTPUT_SHIFT 0 -#define RC_PCIE_RST_OUTPUT BIT(RC_PCIE_RST_OUTPUT_SHIFT) -#define PAXC_RESET_MASK 0x7f - -#define GIC_V3_CFG_SHIFT 0 -#define GIC_V3_CFG BIT(GIC_V3_CFG_SHIFT) - -#define MSI_ENABLE_CFG_SHIFT 0 -#define MSI_ENABLE_CFG BIT(MSI_ENABLE_CFG_SHIFT) - -#define CFG_IND_ADDR_MASK 0x00001ffc - -#define CFG_ADDR_BUS_NUM_SHIFT 20 -#define CFG_ADDR_BUS_NUM_MASK 0x0ff00000 -#define CFG_ADDR_DEV_NUM_SHIFT 15 -#define CFG_ADDR_DEV_NUM_MASK 0x000f8000 -#define CFG_ADDR_FUNC_NUM_SHIFT 12 -#define CFG_ADDR_FUNC_NUM_MASK 0x00007000 -#define CFG_ADDR_REG_NUM_SHIFT 2 -#define CFG_ADDR_REG_NUM_MASK 0x00000ffc -#define CFG_ADDR_CFG_TYPE_SHIFT 0 -#define CFG_ADDR_CFG_TYPE_MASK 0x00000003 - -#define SYS_RC_INTX_MASK 0xf - -#define PCIE_PHYLINKUP_SHIFT 3 -#define PCIE_PHYLINKUP BIT(PCIE_PHYLINKUP_SHIFT) -#define PCIE_DL_ACTIVE_SHIFT 2 -#define PCIE_DL_ACTIVE BIT(PCIE_DL_ACTIVE_SHIFT) - -#define APB_ERR_EN_SHIFT 0 -#define APB_ERR_EN BIT(APB_ERR_EN_SHIFT) +#define EP_PERST_SOURCE_SELECT_SHIFT 2 +#define EP_PERST_SOURCE_SELECT BIT(EP_PERST_SOURCE_SELECT_SHIFT) +#define EP_MODE_SURVIVE_PERST_SHIFT 1 +#define EP_MODE_SURVIVE_PERST BIT(EP_MODE_SURVIVE_PERST_SHIFT) +#define RC_PCIE_RST_OUTPUT_SHIFT 0 +#define RC_PCIE_RST_OUTPUT BIT(RC_PCIE_RST_OUTPUT_SHIFT) +#define PAXC_RESET_MASK 0x7f + +#define GIC_V3_CFG_SHIFT 0 +#define GIC_V3_CFG BIT(GIC_V3_CFG_SHIFT) + +#define MSI_ENABLE_CFG_SHIFT 0 +#define MSI_ENABLE_CFG BIT(MSI_ENABLE_CFG_SHIFT) + +#define CFG_IND_ADDR_MASK 0x00001ffc + +#define CFG_ADDR_BUS_NUM_SHIFT 20 +#define CFG_ADDR_BUS_NUM_MASK 0x0ff00000 +#define CFG_ADDR_DEV_NUM_SHIFT 15 +#define CFG_ADDR_DEV_NUM_MASK 0x000f8000 +#define CFG_ADDR_FUNC_NUM_SHIFT 12 +#define CFG_ADDR_FUNC_NUM_MASK 0x00007000 +#define CFG_ADDR_REG_NUM_SHIFT 2 +#define CFG_ADDR_REG_NUM_MASK 0x00000ffc +#define CFG_ADDR_CFG_TYPE_SHIFT 0 +#define CFG_ADDR_CFG_TYPE_MASK 0x00000003 + +#define SYS_RC_INTX_MASK 0xf + +#define PCIE_PHYLINKUP_SHIFT 3 +#define PCIE_PHYLINKUP BIT(PCIE_PHYLINKUP_SHIFT) +#define PCIE_DL_ACTIVE_SHIFT 2 +#define PCIE_DL_ACTIVE BIT(PCIE_DL_ACTIVE_SHIFT) + +#define APB_ERR_EN_SHIFT 0 +#define APB_ERR_EN BIT(APB_ERR_EN_SHIFT) + +#define CFG_RETRY_STATUS 0xffff0001 +#define CFG_RETRY_STATUS_TIMEOUT_US 500000 /* 500 milliseconds */ /* derive the enum index of the outbound/inbound mapping registers */ -#define MAP_REG(base_reg, index) ((base_reg) + (index) * 2) +#define MAP_REG(base_reg, index) ((base_reg) + (index) * 2) /* * Maximum number of outbound mapping window sizes that can be supported by any * OARR/OMAP mapping pair */ -#define MAX_NUM_OB_WINDOW_SIZES 4 +#define MAX_NUM_OB_WINDOW_SIZES 4 -#define OARR_VALID_SHIFT 0 -#define OARR_VALID BIT(OARR_VALID_SHIFT) -#define OARR_SIZE_CFG_SHIFT 1 +#define OARR_VALID_SHIFT 0 +#define OARR_VALID BIT(OARR_VALID_SHIFT) +#define OARR_SIZE_CFG_SHIFT 1 /* * Maximum number of inbound mapping region sizes that can be supported by an * IARR */ -#define MAX_NUM_IB_REGION_SIZES 9 +#define MAX_NUM_IB_REGION_SIZES 9 -#define IMAP_VALID_SHIFT 0 -#define IMAP_VALID BIT(IMAP_VALID_SHIFT) +#define IMAP_VALID_SHIFT 0 +#define IMAP_VALID BIT(IMAP_VALID_SHIFT) -#define PCI_EXP_CAP 0xac +#define IPROC_PCI_EXP_CAP 0xac -#define IPROC_PCIE_REG_INVALID 0xffff +#define IPROC_PCIE_REG_INVALID 0xffff /** * iProc PCIe outbound mapping controller specific parameters @@ -304,80 +307,80 @@ enum iproc_pcie_reg { /* iProc PCIe PAXB BCMA registers */ static const u16 iproc_pcie_reg_paxb_bcma[] = { - [IPROC_PCIE_CLK_CTRL] = 0x000, - [IPROC_PCIE_CFG_IND_ADDR] = 0x120, - [IPROC_PCIE_CFG_IND_DATA] = 0x124, - [IPROC_PCIE_CFG_ADDR] = 0x1f8, - [IPROC_PCIE_CFG_DATA] = 0x1fc, - [IPROC_PCIE_INTX_EN] = 0x330, - [IPROC_PCIE_LINK_STATUS] = 0xf0c, + [IPROC_PCIE_CLK_CTRL] = 0x000, + [IPROC_PCIE_CFG_IND_ADDR] = 0x120, + [IPROC_PCIE_CFG_IND_DATA] = 0x124, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_INTX_EN] = 0x330, + [IPROC_PCIE_LINK_STATUS] = 0xf0c, }; /* iProc PCIe PAXB registers */ static const u16 iproc_pcie_reg_paxb[] = { - [IPROC_PCIE_CLK_CTRL] = 0x000, - [IPROC_PCIE_CFG_IND_ADDR] = 0x120, - [IPROC_PCIE_CFG_IND_DATA] = 0x124, - [IPROC_PCIE_CFG_ADDR] = 0x1f8, - [IPROC_PCIE_CFG_DATA] = 0x1fc, - [IPROC_PCIE_INTX_EN] = 0x330, - [IPROC_PCIE_OARR0] = 0xd20, - [IPROC_PCIE_OMAP0] = 0xd40, - [IPROC_PCIE_OARR1] = 0xd28, - [IPROC_PCIE_OMAP1] = 0xd48, - [IPROC_PCIE_LINK_STATUS] = 0xf0c, - [IPROC_PCIE_APB_ERR_EN] = 0xf40, + [IPROC_PCIE_CLK_CTRL] = 0x000, + [IPROC_PCIE_CFG_IND_ADDR] = 0x120, + [IPROC_PCIE_CFG_IND_DATA] = 0x124, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_INTX_EN] = 0x330, + [IPROC_PCIE_OARR0] = 0xd20, + [IPROC_PCIE_OMAP0] = 0xd40, + [IPROC_PCIE_OARR1] = 0xd28, + [IPROC_PCIE_OMAP1] = 0xd48, + [IPROC_PCIE_LINK_STATUS] = 0xf0c, + [IPROC_PCIE_APB_ERR_EN] = 0xf40, }; /* iProc PCIe PAXB v2 registers */ static const u16 iproc_pcie_reg_paxb_v2[] = { - [IPROC_PCIE_CLK_CTRL] = 0x000, - [IPROC_PCIE_CFG_IND_ADDR] = 0x120, - [IPROC_PCIE_CFG_IND_DATA] = 0x124, - [IPROC_PCIE_CFG_ADDR] = 0x1f8, - [IPROC_PCIE_CFG_DATA] = 0x1fc, - [IPROC_PCIE_INTX_EN] = 0x330, - [IPROC_PCIE_OARR0] = 0xd20, - [IPROC_PCIE_OMAP0] = 0xd40, - [IPROC_PCIE_OARR1] = 0xd28, - [IPROC_PCIE_OMAP1] = 0xd48, - [IPROC_PCIE_OARR2] = 0xd60, - [IPROC_PCIE_OMAP2] = 0xd68, - [IPROC_PCIE_OARR3] = 0xdf0, - [IPROC_PCIE_OMAP3] = 0xdf8, - [IPROC_PCIE_IARR0] = 0xd00, - [IPROC_PCIE_IMAP0] = 0xc00, - [IPROC_PCIE_IARR2] = 0xd10, - [IPROC_PCIE_IMAP2] = 0xcc0, - [IPROC_PCIE_IARR3] = 0xe00, - [IPROC_PCIE_IMAP3] = 0xe08, - [IPROC_PCIE_IARR4] = 0xe68, - [IPROC_PCIE_IMAP4] = 0xe70, - [IPROC_PCIE_LINK_STATUS] = 0xf0c, - [IPROC_PCIE_APB_ERR_EN] = 0xf40, + [IPROC_PCIE_CLK_CTRL] = 0x000, + [IPROC_PCIE_CFG_IND_ADDR] = 0x120, + [IPROC_PCIE_CFG_IND_DATA] = 0x124, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_INTX_EN] = 0x330, + [IPROC_PCIE_OARR0] = 0xd20, + [IPROC_PCIE_OMAP0] = 0xd40, + [IPROC_PCIE_OARR1] = 0xd28, + [IPROC_PCIE_OMAP1] = 0xd48, + [IPROC_PCIE_OARR2] = 0xd60, + [IPROC_PCIE_OMAP2] = 0xd68, + [IPROC_PCIE_OARR3] = 0xdf0, + [IPROC_PCIE_OMAP3] = 0xdf8, + [IPROC_PCIE_IARR0] = 0xd00, + [IPROC_PCIE_IMAP0] = 0xc00, + [IPROC_PCIE_IARR2] = 0xd10, + [IPROC_PCIE_IMAP2] = 0xcc0, + [IPROC_PCIE_IARR3] = 0xe00, + [IPROC_PCIE_IMAP3] = 0xe08, + [IPROC_PCIE_IARR4] = 0xe68, + [IPROC_PCIE_IMAP4] = 0xe70, + [IPROC_PCIE_LINK_STATUS] = 0xf0c, + [IPROC_PCIE_APB_ERR_EN] = 0xf40, }; /* iProc PCIe PAXC v1 registers */ static const u16 iproc_pcie_reg_paxc[] = { - [IPROC_PCIE_CLK_CTRL] = 0x000, - [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, - [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, - [IPROC_PCIE_CFG_ADDR] = 0x1f8, - [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_CLK_CTRL] = 0x000, + [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, + [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, }; /* iProc PCIe PAXC v2 registers */ static const u16 iproc_pcie_reg_paxc_v2[] = { - [IPROC_PCIE_MSI_GIC_MODE] = 0x050, - [IPROC_PCIE_MSI_BASE_ADDR] = 0x074, - [IPROC_PCIE_MSI_WINDOW_SIZE] = 0x078, - [IPROC_PCIE_MSI_ADDR_LO] = 0x07c, - [IPROC_PCIE_MSI_ADDR_HI] = 0x080, - [IPROC_PCIE_MSI_EN_CFG] = 0x09c, - [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, - [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, - [IPROC_PCIE_CFG_ADDR] = 0x1f8, - [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_MSI_GIC_MODE] = 0x050, + [IPROC_PCIE_MSI_BASE_ADDR] = 0x074, + [IPROC_PCIE_MSI_WINDOW_SIZE] = 0x078, + [IPROC_PCIE_MSI_ADDR_LO] = 0x07c, + [IPROC_PCIE_MSI_ADDR_HI] = 0x080, + [IPROC_PCIE_MSI_EN_CFG] = 0x09c, + [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, + [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, }; static inline struct iproc_pcie *iproc_data(struct pci_bus *bus) @@ -448,18 +451,112 @@ static inline void iproc_pcie_apb_err_disable(struct pci_bus *bus, } } +static void __iomem *iproc_pcie_map_ep_cfg_reg(struct iproc_pcie *pcie, + unsigned int busno, + unsigned int slot, + unsigned int fn, + int where) +{ + u16 offset; + u32 val; + + /* EP device access */ + val = (busno << CFG_ADDR_BUS_NUM_SHIFT) | + (slot << CFG_ADDR_DEV_NUM_SHIFT) | + (fn << CFG_ADDR_FUNC_NUM_SHIFT) | + (where & CFG_ADDR_REG_NUM_MASK) | + (1 & CFG_ADDR_CFG_TYPE_MASK); + + iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val); + offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA); + + if (iproc_pcie_reg_is_invalid(offset)) + return NULL; + + return (pcie->base + offset); +} + +static unsigned int iproc_pcie_cfg_retry(void __iomem *cfg_data_p) +{ + int timeout = CFG_RETRY_STATUS_TIMEOUT_US; + unsigned int data; + + /* + * As per PCIe spec r3.1, sec 2.3.2, CRS Software Visibility only + * affects config reads of the Vendor ID. For config writes or any + * other config reads, the Root may automatically reissue the + * configuration request again as a new request. + * + * For config reads, this hardware returns CFG_RETRY_STATUS data + * when it receives a CRS completion, regardless of the address of + * the read or the CRS Software Visibility Enable bit. As a + * partial workaround for this, we retry in software any read that + * returns CFG_RETRY_STATUS. + * + * Note that a non-Vendor ID config register may have a value of + * CFG_RETRY_STATUS. If we read that, we can't distinguish it from + * a CRS completion, so we will incorrectly retry the read and + * eventually return the wrong data (0xffffffff). + */ + data = readl(cfg_data_p); + while (data == CFG_RETRY_STATUS && timeout--) { + udelay(1); + data = readl(cfg_data_p); + } + + if (data == CFG_RETRY_STATUS) + data = 0xffffffff; + + return data; +} + +static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct iproc_pcie *pcie = iproc_data(bus); + unsigned int slot = PCI_SLOT(devfn); + unsigned int fn = PCI_FUNC(devfn); + unsigned int busno = bus->number; + void __iomem *cfg_data_p; + unsigned int data; + int ret; + + /* root complex access */ + if (busno == 0) { + ret = pci_generic_config_read32(bus, devfn, where, size, val); + if (ret != PCIBIOS_SUCCESSFUL) + return ret; + + /* Don't advertise CRS SV support */ + if ((where & ~0x3) == IPROC_PCI_EXP_CAP + PCI_EXP_RTCTL) + *val &= ~(PCI_EXP_RTCAP_CRSVIS << 16); + return PCIBIOS_SUCCESSFUL; + } + + cfg_data_p = iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); + + if (!cfg_data_p) + return PCIBIOS_DEVICE_NOT_FOUND; + + data = iproc_pcie_cfg_retry(cfg_data_p); + + *val = data; + if (size <= 2) + *val = (data >> (8 * (where & 3))) & ((1 << (size * 8)) - 1); + + return PCIBIOS_SUCCESSFUL; +} + /** * Note access to the configuration registers are protected at the higher layer * by 'pci_lock' in drivers/pci/access.c */ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, - int busno, - unsigned int devfn, + int busno, unsigned int devfn, int where) { unsigned slot = PCI_SLOT(devfn); unsigned fn = PCI_FUNC(devfn); - u32 val; u16 offset; /* root complex access */ @@ -484,18 +581,7 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, if (slot > 0) return NULL; - /* EP device access */ - val = (busno << CFG_ADDR_BUS_NUM_SHIFT) | - (slot << CFG_ADDR_DEV_NUM_SHIFT) | - (fn << CFG_ADDR_FUNC_NUM_SHIFT) | - (where & CFG_ADDR_REG_NUM_MASK) | - (1 & CFG_ADDR_CFG_TYPE_MASK); - iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val); - offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA); - if (iproc_pcie_reg_is_invalid(offset)) - return NULL; - else - return (pcie->base + offset); + return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); } static void __iomem *iproc_pcie_bus_map_cfg_bus(struct pci_bus *bus, @@ -554,9 +640,13 @@ static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { int ret; + struct iproc_pcie *pcie = iproc_data(bus); iproc_pcie_apb_err_disable(bus, true); - ret = pci_generic_config_read32(bus, devfn, where, size, val); + if (pcie->type == IPROC_PCIE_PAXB_V2) + ret = iproc_pcie_config_read(bus, devfn, where, size, val); + else + ret = pci_generic_config_read32(bus, devfn, where, size, val); iproc_pcie_apb_err_disable(bus, false); return ret; @@ -580,7 +670,7 @@ static struct pci_ops iproc_pcie_ops = { .write = iproc_pcie_config_write32, }; -static void iproc_pcie_reset(struct iproc_pcie *pcie) +static void iproc_pcie_perst_ctrl(struct iproc_pcie *pcie, bool assert) { u32 val; @@ -592,26 +682,33 @@ static void iproc_pcie_reset(struct iproc_pcie *pcie) if (pcie->ep_is_internal) return; - /* - * Select perst_b signal as reset source. Put the device into reset, - * and then bring it out of reset - */ - val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL); - val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST & - ~RC_PCIE_RST_OUTPUT; - iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); - udelay(250); - - val |= RC_PCIE_RST_OUTPUT; - iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); - msleep(100); + if (assert) { + val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL); + val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST & + ~RC_PCIE_RST_OUTPUT; + iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); + udelay(250); + } else { + val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL); + val |= RC_PCIE_RST_OUTPUT; + iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); + msleep(100); + } +} + +int iproc_pcie_shutdown(struct iproc_pcie *pcie) +{ + iproc_pcie_perst_ctrl(pcie, true); + msleep(500); + + return 0; } +EXPORT_SYMBOL_GPL(iproc_pcie_shutdown); static int iproc_pcie_check_link(struct iproc_pcie *pcie) { struct device *dev = pcie->dev; u32 hdr_type, link_ctrl, link_status, class, val; - u16 pos = PCI_EXP_CAP; bool link_is_active = false; /* @@ -628,16 +725,16 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie) } /* make sure we are not in EP mode */ - iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, &hdr_type); + iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, &hdr_type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) { dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type); return -EFAULT; } /* force class to PCI_CLASS_BRIDGE_PCI (0x0604) */ -#define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c -#define PCI_CLASS_BRIDGE_MASK 0xffff00 -#define PCI_CLASS_BRIDGE_SHIFT 8 +#define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c +#define PCI_CLASS_BRIDGE_MASK 0xffff00 +#define PCI_CLASS_BRIDGE_SHIFT 8 iproc_pci_raw_config_read32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET, 4, &class); class &= ~PCI_CLASS_BRIDGE_MASK; @@ -646,31 +743,31 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie) 4, class); /* check link status to see if link is active */ - iproc_pci_raw_config_read32(pcie, 0, pos + PCI_EXP_LNKSTA, + iproc_pci_raw_config_read32(pcie, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, 2, &link_status); if (link_status & PCI_EXP_LNKSTA_NLW) link_is_active = true; if (!link_is_active) { /* try GEN 1 link speed */ -#define PCI_TARGET_LINK_SPEED_MASK 0xf -#define PCI_TARGET_LINK_SPEED_GEN2 0x2 -#define PCI_TARGET_LINK_SPEED_GEN1 0x1 +#define PCI_TARGET_LINK_SPEED_MASK 0xf +#define PCI_TARGET_LINK_SPEED_GEN2 0x2 +#define PCI_TARGET_LINK_SPEED_GEN1 0x1 iproc_pci_raw_config_read32(pcie, 0, - pos + PCI_EXP_LNKCTL2, 4, - &link_ctrl); + IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2, + 4, &link_ctrl); if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) == PCI_TARGET_LINK_SPEED_GEN2) { link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK; link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1; iproc_pci_raw_config_write32(pcie, 0, - pos + PCI_EXP_LNKCTL2, - 4, link_ctrl); + IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2, + 4, link_ctrl); msleep(100); iproc_pci_raw_config_read32(pcie, 0, - pos + PCI_EXP_LNKSTA, - 2, &link_status); + IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, + 2, &link_status); if (link_status & PCI_EXP_LNKSTA_NLW) link_is_active = true; } @@ -1223,6 +1320,8 @@ static int iproc_pcie_rev_init(struct iproc_pcie *pcie) pcie->ib.nr_regions = ARRAY_SIZE(paxb_v2_ib_map); pcie->ib_map = paxb_v2_ib_map; pcie->need_msi_steer = true; + dev_warn(dev, "reads of config registers that contain %#x return incorrect data\n", + CFG_RETRY_STATUS); break; case IPROC_PCIE_PAXC: regs = iproc_pcie_reg_paxc; @@ -1286,7 +1385,8 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) goto err_exit_phy; } - iproc_pcie_reset(pcie); + iproc_pcie_perst_ctrl(pcie, true); + iproc_pcie_perst_ctrl(pcie, false); if (pcie->need_ob_cfg) { ret = iproc_pcie_map_ranges(pcie, res); diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h index 0bbe2ea44f3e..a6b55cec9a66 100644 --- a/drivers/pci/host/pcie-iproc.h +++ b/drivers/pci/host/pcie-iproc.h @@ -110,6 +110,7 @@ struct iproc_pcie { int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res); int iproc_pcie_remove(struct iproc_pcie *pcie); +int iproc_pcie_shutdown(struct iproc_pcie *pcie); #ifdef CONFIG_PCIE_IPROC_MSI int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node); diff --git a/drivers/pci/host/pcie-mediatek.c b/drivers/pci/host/pcie-mediatek.c index 5a9d8589ea0b..db93efdf1d63 100644 --- a/drivers/pci/host/pcie-mediatek.c +++ b/drivers/pci/host/pcie-mediatek.c @@ -3,6 +3,7 @@ * * Copyright (c) 2017 MediaTek Inc. * Author: Ryder Lee <ryder.lee@mediatek.com> + * Honghui Zhang <honghui.zhang@mediatek.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +17,9 @@ #include <linux/clk.h> #include <linux/delay.h> +#include <linux/iopoll.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/kernel.h> #include <linux/of_address.h> #include <linux/of_pci.h> @@ -63,16 +67,104 @@ #define PCIE_FC_CREDIT_MASK (GENMASK(31, 31) | GENMASK(28, 16)) #define PCIE_FC_CREDIT_VAL(x) ((x) << 16) +/* PCIe V2 share registers */ +#define PCIE_SYS_CFG_V2 0x0 +#define PCIE_CSR_LTSSM_EN(x) BIT(0 + (x) * 8) +#define PCIE_CSR_ASPM_L1_EN(x) BIT(1 + (x) * 8) + +/* PCIe V2 per-port registers */ +#define PCIE_MSI_VECTOR 0x0c0 +#define PCIE_INT_MASK 0x420 +#define INTX_MASK GENMASK(19, 16) +#define INTX_SHIFT 16 +#define PCIE_INT_STATUS 0x424 +#define MSI_STATUS BIT(23) +#define PCIE_IMSI_STATUS 0x42c +#define PCIE_IMSI_ADDR 0x430 +#define MSI_MASK BIT(23) +#define MTK_MSI_IRQS_NUM 32 + +#define PCIE_AHB_TRANS_BASE0_L 0x438 +#define PCIE_AHB_TRANS_BASE0_H 0x43c +#define AHB2PCIE_SIZE(x) ((x) & GENMASK(4, 0)) +#define PCIE_AXI_WINDOW0 0x448 +#define WIN_ENABLE BIT(7) + +/* PCIe V2 configuration transaction header */ +#define PCIE_CFG_HEADER0 0x460 +#define PCIE_CFG_HEADER1 0x464 +#define PCIE_CFG_HEADER2 0x468 +#define PCIE_CFG_WDATA 0x470 +#define PCIE_APP_TLP_REQ 0x488 +#define PCIE_CFG_RDATA 0x48c +#define APP_CFG_REQ BIT(0) +#define APP_CPL_STATUS GENMASK(7, 5) + +#define CFG_WRRD_TYPE_0 4 +#define CFG_WR_FMT 2 +#define CFG_RD_FMT 0 + +#define CFG_DW0_LENGTH(length) ((length) & GENMASK(9, 0)) +#define CFG_DW0_TYPE(type) (((type) << 24) & GENMASK(28, 24)) +#define CFG_DW0_FMT(fmt) (((fmt) << 29) & GENMASK(31, 29)) +#define CFG_DW2_REGN(regn) ((regn) & GENMASK(11, 2)) +#define CFG_DW2_FUN(fun) (((fun) << 16) & GENMASK(18, 16)) +#define CFG_DW2_DEV(dev) (((dev) << 19) & GENMASK(23, 19)) +#define CFG_DW2_BUS(bus) (((bus) << 24) & GENMASK(31, 24)) +#define CFG_HEADER_DW0(type, fmt) \ + (CFG_DW0_LENGTH(1) | CFG_DW0_TYPE(type) | CFG_DW0_FMT(fmt)) +#define CFG_HEADER_DW1(where, size) \ + (GENMASK(((size) - 1), 0) << ((where) & 0x3)) +#define CFG_HEADER_DW2(regn, fun, dev, bus) \ + (CFG_DW2_REGN(regn) | CFG_DW2_FUN(fun) | \ + CFG_DW2_DEV(dev) | CFG_DW2_BUS(bus)) + +#define PCIE_RST_CTRL 0x510 +#define PCIE_PHY_RSTB BIT(0) +#define PCIE_PIPE_SRSTB BIT(1) +#define PCIE_MAC_SRSTB BIT(2) +#define PCIE_CRSTB BIT(3) +#define PCIE_PERSTB BIT(8) +#define PCIE_LINKDOWN_RST_EN GENMASK(15, 13) +#define PCIE_LINK_STATUS_V2 0x804 +#define PCIE_PORT_LINKUP_V2 BIT(10) + +struct mtk_pcie_port; + +/** + * struct mtk_pcie_soc - differentiate between host generations + * @has_msi: whether this host supports MSI interrupts or not + * @ops: pointer to configuration access functions + * @startup: pointer to controller setting functions + * @setup_irq: pointer to initialize IRQ functions + */ +struct mtk_pcie_soc { + bool has_msi; + struct pci_ops *ops; + int (*startup)(struct mtk_pcie_port *port); + int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node); +}; + /** * struct mtk_pcie_port - PCIe port information * @base: IO mapped register base * @list: port list * @pcie: pointer to PCIe host info * @reset: pointer to port reset control - * @sys_ck: pointer to bus clock - * @phy: pointer to phy control block + * @sys_ck: pointer to transaction/data link layer clock + * @ahb_ck: pointer to AHB slave interface operating clock for CSR access + * and RC initiated MMIO access + * @axi_ck: pointer to application layer MMIO channel operating clock + * @aux_ck: pointer to pe2_mac_bridge and pe2_mac_core operating clock + * when pcie_mac_ck/pcie_pipe_ck is turned off + * @obff_ck: pointer to OBFF functional block operating clock + * @pipe_ck: pointer to LTSSM and PHY/MAC layer operating clock + * @phy: pointer to PHY control block * @lane: lane count - * @index: port index + * @slot: port slot + * @irq_domain: legacy INTx IRQ domain + * @msi_domain: MSI IRQ domain + * @msi_irq_in_use: bit map for assigned MSI IRQ */ struct mtk_pcie_port { void __iomem *base; @@ -80,9 +172,17 @@ struct mtk_pcie_port { struct mtk_pcie *pcie; struct reset_control *reset; struct clk *sys_ck; + struct clk *ahb_ck; + struct clk *axi_ck; + struct clk *aux_ck; + struct clk *obff_ck; + struct clk *pipe_ck; struct phy *phy; u32 lane; - u32 index; + u32 slot; + struct irq_domain *irq_domain; + struct irq_domain *msi_domain; + DECLARE_BITMAP(msi_irq_in_use, MTK_MSI_IRQS_NUM); }; /** @@ -96,6 +196,7 @@ struct mtk_pcie_port { * @busn: bus range * @offset: IO / Memory offset * @ports: pointer to PCIe port information + * @soc: pointer to SoC-dependent operations */ struct mtk_pcie { struct device *dev; @@ -111,13 +212,9 @@ struct mtk_pcie { resource_size_t io; } offset; struct list_head ports; + const struct mtk_pcie_soc *soc; }; -static inline bool mtk_pcie_link_up(struct mtk_pcie_port *port) -{ - return !!(readl(port->base + PCIE_LINK_STATUS) & PCIE_PORT_LINKUP); -} - static void mtk_pcie_subsys_powerdown(struct mtk_pcie *pcie) { struct device *dev = pcie->dev; @@ -146,6 +243,12 @@ static void mtk_pcie_put_resources(struct mtk_pcie *pcie) list_for_each_entry_safe(port, tmp, &pcie->ports, list) { phy_power_off(port->phy); + phy_exit(port->phy); + clk_disable_unprepare(port->pipe_ck); + clk_disable_unprepare(port->obff_ck); + clk_disable_unprepare(port->axi_ck); + clk_disable_unprepare(port->aux_ck); + clk_disable_unprepare(port->ahb_ck); clk_disable_unprepare(port->sys_ck); mtk_pcie_port_free(port); } @@ -153,11 +256,412 @@ static void mtk_pcie_put_resources(struct mtk_pcie *pcie) mtk_pcie_subsys_powerdown(pcie); } +static int mtk_pcie_check_cfg_cpld(struct mtk_pcie_port *port) +{ + u32 val; + int err; + + err = readl_poll_timeout_atomic(port->base + PCIE_APP_TLP_REQ, val, + !(val & APP_CFG_REQ), 10, + 100 * USEC_PER_MSEC); + if (err) + return PCIBIOS_SET_FAILED; + + if (readl(port->base + PCIE_APP_TLP_REQ) & APP_CPL_STATUS) + return PCIBIOS_SET_FAILED; + + return PCIBIOS_SUCCESSFUL; +} + +static int mtk_pcie_hw_rd_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn, + int where, int size, u32 *val) +{ + u32 tmp; + + /* Write PCIe configuration transaction header for Cfgrd */ + writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_RD_FMT), + port->base + PCIE_CFG_HEADER0); + writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1); + writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus), + port->base + PCIE_CFG_HEADER2); + + /* Trigger h/w to transmit Cfgrd TLP */ + tmp = readl(port->base + PCIE_APP_TLP_REQ); + tmp |= APP_CFG_REQ; + writel(tmp, port->base + PCIE_APP_TLP_REQ); + + /* Check completion status */ + if (mtk_pcie_check_cfg_cpld(port)) + return PCIBIOS_SET_FAILED; + + /* Read cpld payload of Cfgrd */ + *val = readl(port->base + PCIE_CFG_RDATA); + + if (size == 1) + *val = (*val >> (8 * (where & 3))) & 0xff; + else if (size == 2) + *val = (*val >> (8 * (where & 3))) & 0xffff; + + return PCIBIOS_SUCCESSFUL; +} + +static int mtk_pcie_hw_wr_cfg(struct mtk_pcie_port *port, u32 bus, u32 devfn, + int where, int size, u32 val) +{ + /* Write PCIe configuration transaction header for Cfgwr */ + writel(CFG_HEADER_DW0(CFG_WRRD_TYPE_0, CFG_WR_FMT), + port->base + PCIE_CFG_HEADER0); + writel(CFG_HEADER_DW1(where, size), port->base + PCIE_CFG_HEADER1); + writel(CFG_HEADER_DW2(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus), + port->base + PCIE_CFG_HEADER2); + + /* Write Cfgwr data */ + val = val << 8 * (where & 3); + writel(val, port->base + PCIE_CFG_WDATA); + + /* Trigger h/w to transmit Cfgwr TLP */ + val = readl(port->base + PCIE_APP_TLP_REQ); + val |= APP_CFG_REQ; + writel(val, port->base + PCIE_APP_TLP_REQ); + + /* Check completion status */ + return mtk_pcie_check_cfg_cpld(port); +} + +static struct mtk_pcie_port *mtk_pcie_find_port(struct pci_bus *bus, + unsigned int devfn) +{ + struct mtk_pcie *pcie = bus->sysdata; + struct mtk_pcie_port *port; + + list_for_each_entry(port, &pcie->ports, list) + if (port->slot == PCI_SLOT(devfn)) + return port; + + return NULL; +} + +static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct mtk_pcie_port *port; + u32 bn = bus->number; + int ret; + + port = mtk_pcie_find_port(bus, devfn); + if (!port) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + ret = mtk_pcie_hw_rd_cfg(port, bn, devfn, where, size, val); + if (ret) + *val = ~0; + + return ret; +} + +static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct mtk_pcie_port *port; + u32 bn = bus->number; + + port = mtk_pcie_find_port(bus, devfn); + if (!port) + return PCIBIOS_DEVICE_NOT_FOUND; + + return mtk_pcie_hw_wr_cfg(port, bn, devfn, where, size, val); +} + +static struct pci_ops mtk_pcie_ops_v2 = { + .read = mtk_pcie_config_read, + .write = mtk_pcie_config_write, +}; + +static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port) +{ + struct mtk_pcie *pcie = port->pcie; + struct resource *mem = &pcie->mem; + u32 val; + size_t size; + int err; + + /* MT7622 platforms need to enable LTSSM and ASPM from PCIe subsys */ + if (pcie->base) { + val = readl(pcie->base + PCIE_SYS_CFG_V2); + val |= PCIE_CSR_LTSSM_EN(port->slot) | + PCIE_CSR_ASPM_L1_EN(port->slot); + writel(val, pcie->base + PCIE_SYS_CFG_V2); + } + + /* Assert all reset signals */ + writel(0, port->base + PCIE_RST_CTRL); + + /* + * Enable PCIe link down reset, if link status changed from link up to + * link down, this will reset MAC control registers and configuration + * space. + */ + writel(PCIE_LINKDOWN_RST_EN, port->base + PCIE_RST_CTRL); + + /* De-assert PHY, PE, PIPE, MAC and configuration reset */ + val = readl(port->base + PCIE_RST_CTRL); + val |= PCIE_PHY_RSTB | PCIE_PERSTB | PCIE_PIPE_SRSTB | + PCIE_MAC_SRSTB | PCIE_CRSTB; + writel(val, port->base + PCIE_RST_CTRL); + + /* 100ms timeout value should be enough for Gen1/2 training */ + err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val, + !!(val & PCIE_PORT_LINKUP_V2), 20, + 100 * USEC_PER_MSEC); + if (err) + return -ETIMEDOUT; + + /* Set INTx mask */ + val = readl(port->base + PCIE_INT_MASK); + val &= ~INTX_MASK; + writel(val, port->base + PCIE_INT_MASK); + + /* Set AHB to PCIe translation windows */ + size = mem->end - mem->start; + val = lower_32_bits(mem->start) | AHB2PCIE_SIZE(fls(size)); + writel(val, port->base + PCIE_AHB_TRANS_BASE0_L); + + val = upper_32_bits(mem->start); + writel(val, port->base + PCIE_AHB_TRANS_BASE0_H); + + /* Set PCIe to AXI translation memory space.*/ + val = fls(0xffffffff) | WIN_ENABLE; + writel(val, port->base + PCIE_AXI_WINDOW0); + + return 0; +} + +static int mtk_pcie_msi_alloc(struct mtk_pcie_port *port) +{ + int msi; + + msi = find_first_zero_bit(port->msi_irq_in_use, MTK_MSI_IRQS_NUM); + if (msi < MTK_MSI_IRQS_NUM) + set_bit(msi, port->msi_irq_in_use); + else + return -ENOSPC; + + return msi; +} + +static void mtk_pcie_msi_free(struct mtk_pcie_port *port, unsigned long hwirq) +{ + clear_bit(hwirq, port->msi_irq_in_use); +} + +static int mtk_pcie_msi_setup_irq(struct msi_controller *chip, + struct pci_dev *pdev, struct msi_desc *desc) +{ + struct mtk_pcie_port *port; + struct msi_msg msg; + unsigned int irq; + int hwirq; + phys_addr_t msg_addr; + + port = mtk_pcie_find_port(pdev->bus, pdev->devfn); + if (!port) + return -EINVAL; + + hwirq = mtk_pcie_msi_alloc(port); + if (hwirq < 0) + return hwirq; + + irq = irq_create_mapping(port->msi_domain, hwirq); + if (!irq) { + mtk_pcie_msi_free(port, hwirq); + return -EINVAL; + } + + chip->dev = &pdev->dev; + + irq_set_msi_desc(irq, desc); + + /* MT2712/MT7622 only support 32-bit MSI addresses */ + msg_addr = virt_to_phys(port->base + PCIE_MSI_VECTOR); + msg.address_hi = 0; + msg.address_lo = lower_32_bits(msg_addr); + msg.data = hwirq; + + pci_write_msi_msg(irq, &msg); + + return 0; +} + +static void mtk_msi_teardown_irq(struct msi_controller *chip, unsigned int irq) +{ + struct pci_dev *pdev = to_pci_dev(chip->dev); + struct irq_data *d = irq_get_irq_data(irq); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct mtk_pcie_port *port; + + port = mtk_pcie_find_port(pdev->bus, pdev->devfn); + if (!port) + return; + + irq_dispose_mapping(irq); + mtk_pcie_msi_free(port, hwirq); +} + +static struct msi_controller mtk_pcie_msi_chip = { + .setup_irq = mtk_pcie_msi_setup_irq, + .teardown_irq = mtk_msi_teardown_irq, +}; + +static struct irq_chip mtk_msi_irq_chip = { + .name = "MTK PCIe MSI", + .irq_enable = pci_msi_unmask_irq, + .irq_disable = pci_msi_mask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, +}; + +static int mtk_pcie_msi_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_and_handler(irq, &mtk_msi_irq_chip, handle_simple_irq); + irq_set_chip_data(irq, domain->host_data); + + return 0; +} + +static const struct irq_domain_ops msi_domain_ops = { + .map = mtk_pcie_msi_map, +}; + +static void mtk_pcie_enable_msi(struct mtk_pcie_port *port) +{ + u32 val; + phys_addr_t msg_addr; + + msg_addr = virt_to_phys(port->base + PCIE_MSI_VECTOR); + val = lower_32_bits(msg_addr); + writel(val, port->base + PCIE_IMSI_ADDR); + + val = readl(port->base + PCIE_INT_MASK); + val &= ~MSI_MASK; + writel(val, port->base + PCIE_INT_MASK); +} + +static int mtk_pcie_intx_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq); + irq_set_chip_data(irq, domain->host_data); + + return 0; +} + +static const struct irq_domain_ops intx_domain_ops = { + .map = mtk_pcie_intx_map, +}; + +static int mtk_pcie_init_irq_domain(struct mtk_pcie_port *port, + struct device_node *node) +{ + struct device *dev = port->pcie->dev; + struct device_node *pcie_intc_node; + + /* Setup INTx */ + pcie_intc_node = of_get_next_child(node, NULL); + if (!pcie_intc_node) { + dev_err(dev, "no PCIe Intc node found\n"); + return -ENODEV; + } + + port->irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, + &intx_domain_ops, port); + if (!port->irq_domain) { + dev_err(dev, "failed to get INTx IRQ domain\n"); + return -ENODEV; + } + + if (IS_ENABLED(CONFIG_PCI_MSI)) { + port->msi_domain = irq_domain_add_linear(node, MTK_MSI_IRQS_NUM, + &msi_domain_ops, + &mtk_pcie_msi_chip); + if (!port->msi_domain) { + dev_err(dev, "failed to create MSI IRQ domain\n"); + return -ENODEV; + } + mtk_pcie_enable_msi(port); + } + + return 0; +} + +static irqreturn_t mtk_pcie_intr_handler(int irq, void *data) +{ + struct mtk_pcie_port *port = (struct mtk_pcie_port *)data; + unsigned long status; + u32 virq; + u32 bit = INTX_SHIFT; + + while ((status = readl(port->base + PCIE_INT_STATUS)) & INTX_MASK) { + for_each_set_bit_from(bit, &status, PCI_NUM_INTX + INTX_SHIFT) { + /* Clear the INTx */ + writel(1 << bit, port->base + PCIE_INT_STATUS); + virq = irq_find_mapping(port->irq_domain, + bit - INTX_SHIFT); + generic_handle_irq(virq); + } + } + + if (IS_ENABLED(CONFIG_PCI_MSI)) { + while ((status = readl(port->base + PCIE_INT_STATUS)) & MSI_STATUS) { + unsigned long imsi_status; + + while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) { + for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM) { + /* Clear the MSI */ + writel(1 << bit, port->base + PCIE_IMSI_STATUS); + virq = irq_find_mapping(port->msi_domain, bit); + generic_handle_irq(virq); + } + } + /* Clear MSI interrupt status */ + writel(MSI_STATUS, port->base + PCIE_INT_STATUS); + } + } + + return IRQ_HANDLED; +} + +static int mtk_pcie_setup_irq(struct mtk_pcie_port *port, + struct device_node *node) +{ + struct mtk_pcie *pcie = port->pcie; + struct device *dev = pcie->dev; + struct platform_device *pdev = to_platform_device(dev); + int err, irq; + + irq = platform_get_irq(pdev, port->slot); + err = devm_request_irq(dev, irq, mtk_pcie_intr_handler, + IRQF_SHARED, "mtk-pcie", port); + if (err) { + dev_err(dev, "unable to request IRQ %d\n", irq); + return err; + } + + err = mtk_pcie_init_irq_domain(port, node); + if (err) { + dev_err(dev, "failed to init PCIe IRQ domain\n"); + return err; + } + + return 0; +} + static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus, unsigned int devfn, int where) { - struct pci_host_bridge *host = pci_find_host_bridge(bus); - struct mtk_pcie *pcie = pci_host_bridge_priv(host); + struct mtk_pcie *pcie = bus->sysdata; writel(PCIE_CONF_ADDR(where, PCI_FUNC(devfn), PCI_SLOT(devfn), bus->number), pcie->base + PCIE_CFG_ADDR); @@ -171,16 +675,34 @@ static struct pci_ops mtk_pcie_ops = { .write = pci_generic_config_write, }; -static void mtk_pcie_configure_rc(struct mtk_pcie_port *port) +static int mtk_pcie_startup_port(struct mtk_pcie_port *port) { struct mtk_pcie *pcie = port->pcie; - u32 func = PCI_FUNC(port->index << 3); - u32 slot = PCI_SLOT(port->index << 3); + u32 func = PCI_FUNC(port->slot << 3); + u32 slot = PCI_SLOT(port->slot << 3); u32 val; + int err; + + /* assert port PERST_N */ + val = readl(pcie->base + PCIE_SYS_CFG); + val |= PCIE_PORT_PERST(port->slot); + writel(val, pcie->base + PCIE_SYS_CFG); + + /* de-assert port PERST_N */ + val = readl(pcie->base + PCIE_SYS_CFG); + val &= ~PCIE_PORT_PERST(port->slot); + writel(val, pcie->base + PCIE_SYS_CFG); + + /* 100ms timeout value should be enough for Gen1/2 training */ + err = readl_poll_timeout(port->base + PCIE_LINK_STATUS, val, + !!(val & PCIE_PORT_LINKUP), 20, + 100 * USEC_PER_MSEC); + if (err) + return -ETIMEDOUT; /* enable interrupt */ val = readl(pcie->base + PCIE_INT_ENABLE); - val |= PCIE_PORT_INT_EN(port->index); + val |= PCIE_PORT_INT_EN(port->slot); writel(val, pcie->base + PCIE_INT_ENABLE); /* map to all DDR region. We need to set it before cfg operation. */ @@ -209,67 +731,94 @@ static void mtk_pcie_configure_rc(struct mtk_pcie_port *port) writel(PCIE_CONF_ADDR(PCIE_FTS_NUM, func, slot, 0), pcie->base + PCIE_CFG_ADDR); writel(val, pcie->base + PCIE_CFG_DATA); + + return 0; } -static void mtk_pcie_assert_ports(struct mtk_pcie_port *port) +static void mtk_pcie_enable_port(struct mtk_pcie_port *port) { struct mtk_pcie *pcie = port->pcie; - u32 val; + struct device *dev = pcie->dev; + int err; - /* assert port PERST_N */ - val = readl(pcie->base + PCIE_SYS_CFG); - val |= PCIE_PORT_PERST(port->index); - writel(val, pcie->base + PCIE_SYS_CFG); + err = clk_prepare_enable(port->sys_ck); + if (err) { + dev_err(dev, "failed to enable sys_ck%d clock\n", port->slot); + goto err_sys_clk; + } - /* de-assert port PERST_N */ - val = readl(pcie->base + PCIE_SYS_CFG); - val &= ~PCIE_PORT_PERST(port->index); - writel(val, pcie->base + PCIE_SYS_CFG); + err = clk_prepare_enable(port->ahb_ck); + if (err) { + dev_err(dev, "failed to enable ahb_ck%d\n", port->slot); + goto err_ahb_clk; + } - /* PCIe v2.0 need at least 100ms delay to train from Gen1 to Gen2 */ - msleep(100); -} + err = clk_prepare_enable(port->aux_ck); + if (err) { + dev_err(dev, "failed to enable aux_ck%d\n", port->slot); + goto err_aux_clk; + } -static void mtk_pcie_enable_ports(struct mtk_pcie_port *port) -{ - struct device *dev = port->pcie->dev; - int err; + err = clk_prepare_enable(port->axi_ck); + if (err) { + dev_err(dev, "failed to enable axi_ck%d\n", port->slot); + goto err_axi_clk; + } - err = clk_prepare_enable(port->sys_ck); + err = clk_prepare_enable(port->obff_ck); if (err) { - dev_err(dev, "failed to enable port%d clock\n", port->index); - goto err_sys_clk; + dev_err(dev, "failed to enable obff_ck%d\n", port->slot); + goto err_obff_clk; + } + + err = clk_prepare_enable(port->pipe_ck); + if (err) { + dev_err(dev, "failed to enable pipe_ck%d\n", port->slot); + goto err_pipe_clk; } reset_control_assert(port->reset); reset_control_deassert(port->reset); + err = phy_init(port->phy); + if (err) { + dev_err(dev, "failed to initialize port%d phy\n", port->slot); + goto err_phy_init; + } + err = phy_power_on(port->phy); if (err) { - dev_err(dev, "failed to power on port%d phy\n", port->index); + dev_err(dev, "failed to power on port%d phy\n", port->slot); goto err_phy_on; } - mtk_pcie_assert_ports(port); - - /* if link up, then setup root port configuration space */ - if (mtk_pcie_link_up(port)) { - mtk_pcie_configure_rc(port); + if (!pcie->soc->startup(port)) return; - } - dev_info(dev, "Port%d link down\n", port->index); + dev_info(dev, "Port%d link down\n", port->slot); phy_power_off(port->phy); err_phy_on: + phy_exit(port->phy); +err_phy_init: + clk_disable_unprepare(port->pipe_ck); +err_pipe_clk: + clk_disable_unprepare(port->obff_ck); +err_obff_clk: + clk_disable_unprepare(port->axi_ck); +err_axi_clk: + clk_disable_unprepare(port->aux_ck); +err_aux_clk: + clk_disable_unprepare(port->ahb_ck); +err_ahb_clk: clk_disable_unprepare(port->sys_ck); err_sys_clk: mtk_pcie_port_free(port); } -static int mtk_pcie_parse_ports(struct mtk_pcie *pcie, - struct device_node *node, - int index) +static int mtk_pcie_parse_port(struct mtk_pcie *pcie, + struct device_node *node, + int slot) { struct mtk_pcie_port *port; struct resource *regs; @@ -288,34 +837,87 @@ static int mtk_pcie_parse_ports(struct mtk_pcie *pcie, return err; } - regs = platform_get_resource(pdev, IORESOURCE_MEM, index + 1); + snprintf(name, sizeof(name), "port%d", slot); + regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); port->base = devm_ioremap_resource(dev, regs); if (IS_ERR(port->base)) { - dev_err(dev, "failed to map port%d base\n", index); + dev_err(dev, "failed to map port%d base\n", slot); return PTR_ERR(port->base); } - snprintf(name, sizeof(name), "sys_ck%d", index); + snprintf(name, sizeof(name), "sys_ck%d", slot); port->sys_ck = devm_clk_get(dev, name); if (IS_ERR(port->sys_ck)) { - dev_err(dev, "failed to get port%d clock\n", index); + dev_err(dev, "failed to get sys_ck%d clock\n", slot); return PTR_ERR(port->sys_ck); } - snprintf(name, sizeof(name), "pcie-rst%d", index); - port->reset = devm_reset_control_get_optional(dev, name); + /* sys_ck might be divided into the following parts in some chips */ + snprintf(name, sizeof(name), "ahb_ck%d", slot); + port->ahb_ck = devm_clk_get(dev, name); + if (IS_ERR(port->ahb_ck)) { + if (PTR_ERR(port->ahb_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + port->ahb_ck = NULL; + } + + snprintf(name, sizeof(name), "axi_ck%d", slot); + port->axi_ck = devm_clk_get(dev, name); + if (IS_ERR(port->axi_ck)) { + if (PTR_ERR(port->axi_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + port->axi_ck = NULL; + } + + snprintf(name, sizeof(name), "aux_ck%d", slot); + port->aux_ck = devm_clk_get(dev, name); + if (IS_ERR(port->aux_ck)) { + if (PTR_ERR(port->aux_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + port->aux_ck = NULL; + } + + snprintf(name, sizeof(name), "obff_ck%d", slot); + port->obff_ck = devm_clk_get(dev, name); + if (IS_ERR(port->obff_ck)) { + if (PTR_ERR(port->obff_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + port->obff_ck = NULL; + } + + snprintf(name, sizeof(name), "pipe_ck%d", slot); + port->pipe_ck = devm_clk_get(dev, name); + if (IS_ERR(port->pipe_ck)) { + if (PTR_ERR(port->pipe_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + port->pipe_ck = NULL; + } + + snprintf(name, sizeof(name), "pcie-rst%d", slot); + port->reset = devm_reset_control_get_optional_exclusive(dev, name); if (PTR_ERR(port->reset) == -EPROBE_DEFER) return PTR_ERR(port->reset); /* some platforms may use default PHY setting */ - snprintf(name, sizeof(name), "pcie-phy%d", index); + snprintf(name, sizeof(name), "pcie-phy%d", slot); port->phy = devm_phy_optional_get(dev, name); if (IS_ERR(port->phy)) return PTR_ERR(port->phy); - port->index = index; + port->slot = slot; port->pcie = pcie; + if (pcie->soc->setup_irq) { + err = pcie->soc->setup_irq(port, node); + if (err) + return err; + } + INIT_LIST_HEAD(&port->list); list_add_tail(&port->list, &pcie->ports); @@ -329,12 +931,14 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie) struct resource *regs; int err; - /* get shared registers */ - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pcie->base = devm_ioremap_resource(dev, regs); - if (IS_ERR(pcie->base)) { - dev_err(dev, "failed to map shared register\n"); - return PTR_ERR(pcie->base); + /* get shared registers, which are optional */ + regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "subsys"); + if (regs) { + pcie->base = devm_ioremap_resource(dev, regs); + if (IS_ERR(pcie->base)) { + dev_err(dev, "failed to map shared register\n"); + return PTR_ERR(pcie->base); + } } pcie->free_ck = devm_clk_get(dev, "free_ck"); @@ -422,7 +1026,7 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie) } for_each_available_child_of_node(node, child) { - int index; + int slot; err = of_pci_get_devfn(child); if (err < 0) { @@ -430,9 +1034,9 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie) return err; } - index = PCI_SLOT(err); + slot = PCI_SLOT(err); - err = mtk_pcie_parse_ports(pcie, child, index); + err = mtk_pcie_parse_port(pcie, child, slot); if (err) return err; } @@ -443,7 +1047,7 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie) /* enable each port, and then check link status */ list_for_each_entry_safe(port, tmp, &pcie->ports, list) - mtk_pcie_enable_ports(port); + mtk_pcie_enable_port(port); /* power down PCIe subsys if slots are all empty (link down) */ if (list_empty(&pcie->ports)) @@ -480,9 +1084,12 @@ static int mtk_pcie_register_host(struct pci_host_bridge *host) host->busnr = pcie->busn.start; host->dev.parent = pcie->dev; - host->ops = &mtk_pcie_ops; + host->ops = pcie->soc->ops; host->map_irq = of_irq_parse_and_map_pci; host->swizzle_irq = pci_common_swizzle; + host->sysdata = pcie; + if (IS_ENABLED(CONFIG_PCI_MSI) && pcie->soc->has_msi) + host->msi = &mtk_pcie_msi_chip; err = pci_scan_root_bus_bridge(host); if (err < 0) @@ -513,6 +1120,7 @@ static int mtk_pcie_probe(struct platform_device *pdev) pcie = pci_host_bridge_priv(host); pcie->dev = dev; + pcie->soc = of_device_get_match_data(dev); platform_set_drvdata(pdev, pcie); INIT_LIST_HEAD(&pcie->ports); @@ -537,9 +1145,23 @@ put_resources: return err; } +static const struct mtk_pcie_soc mtk_pcie_soc_v1 = { + .ops = &mtk_pcie_ops, + .startup = mtk_pcie_startup_port, +}; + +static const struct mtk_pcie_soc mtk_pcie_soc_v2 = { + .has_msi = true, + .ops = &mtk_pcie_ops_v2, + .startup = mtk_pcie_startup_port_v2, + .setup_irq = mtk_pcie_setup_irq, +}; + static const struct of_device_id mtk_pcie_ids[] = { - { .compatible = "mediatek,mt7623-pcie"}, - { .compatible = "mediatek,mt2701-pcie"}, + { .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 }, + { .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 }, + { .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_v2 }, + { .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_v2 }, {}, }; diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index 246d485b24c6..4e0b25d09b0c 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -471,10 +471,8 @@ static int rcar_pcie_enable(struct rcar_pcie *pcie) bridge->msi = &pcie->msi.chip; ret = pci_scan_root_bus_bridge(bridge); - if (ret < 0) { - kfree(bridge); + if (ret < 0) return ret; - } bus = bridge->bus; @@ -1190,14 +1188,16 @@ static int rcar_pcie_probe(struct platform_device *pdev) return 0; -err_free_bridge: - pci_free_host_bridge(bridge); - err_pm_put: pm_runtime_put(dev); err_pm_disable: pm_runtime_disable(dev); + +err_free_bridge: + pci_free_host_bridge(bridge); + pci_free_resource_list(&pcie->resources); + return err; } diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index 7bb9870f6d8c..9051c6c8fea4 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -6,7 +6,7 @@ * Author: Shawn Lin <shawn.lin@rock-chips.com> * Wenrui Li <wenrui.li@rock-chips.com> * - * Bits taken from Synopsys Designware Host controller driver and + * Bits taken from Synopsys DesignWare Host controller driver and * ARM PCI Host generic driver. * * This program is free software: you can redistribute it and/or modify @@ -15,6 +15,7 @@ * (at your option) any later version. */ +#include <linux/bitrev.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> @@ -47,6 +48,7 @@ #define HIWORD_UPDATE_BIT(val) HIWORD_UPDATE(val, val) #define ENCODE_LANES(x) ((((x) >> 1) & 3) << 4) +#define MAX_LANE_NUM 4 #define PCIE_CLIENT_BASE 0x0 #define PCIE_CLIENT_CONFIG (PCIE_CLIENT_BASE + 0x00) @@ -111,6 +113,9 @@ #define PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT 16 #define PCIE_CORE_TXCREDIT_CFG1_MUI_ENCODE(x) \ (((x) >> 3) << PCIE_CORE_TXCREDIT_CFG1_MUI_SHIFT) +#define PCIE_CORE_LANE_MAP (PCIE_CORE_CTRL_MGMT_BASE + 0x200) +#define PCIE_CORE_LANE_MAP_MASK 0x0000000f +#define PCIE_CORE_LANE_MAP_REVERSE BIT(16) #define PCIE_CORE_INT_STATUS (PCIE_CORE_CTRL_MGMT_BASE + 0x20c) #define PCIE_CORE_INT_PRFPE BIT(0) #define PCIE_CORE_INT_CRFPE BIT(1) @@ -210,7 +215,8 @@ struct rockchip_pcie { void __iomem *reg_base; /* DT axi-base */ void __iomem *apb_base; /* DT apb-base */ - struct phy *phy; + bool legacy_phy; + struct phy *phys[MAX_LANE_NUM]; struct reset_control *core_rst; struct reset_control *mgmt_rst; struct reset_control *mgmt_sticky_rst; @@ -222,11 +228,13 @@ struct rockchip_pcie { struct clk *aclk_perf_pcie; struct clk *hclk_pcie; struct clk *clk_pcie_pm; + struct regulator *vpcie12v; /* 12V power supply */ struct regulator *vpcie3v3; /* 3.3V power supply */ struct regulator *vpcie1v8; /* 1.8V power supply */ struct regulator *vpcie0v9; /* 0.9V power supply */ struct gpio_desc *ep_gpio; u32 lanes; + u8 lanes_map; u8 root_bus_nr; int link_gen; struct device *dev; @@ -299,6 +307,24 @@ static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip, return 1; } +static u8 rockchip_pcie_lane_map(struct rockchip_pcie *rockchip) +{ + u32 val; + u8 map; + + if (rockchip->legacy_phy) + return GENMASK(MAX_LANE_NUM - 1, 0); + + val = rockchip_pcie_read(rockchip, PCIE_CORE_LANE_MAP); + map = val & PCIE_CORE_LANE_MAP_MASK; + + /* The link may be using a reverse-indexed mapping. */ + if (val & PCIE_CORE_LANE_MAP_REVERSE) + map = bitrev8(map) >> 4; + + return map; +} + static int rockchip_pcie_rd_own_conf(struct rockchip_pcie *rockchip, int where, int size, u32 *val) { @@ -514,10 +540,10 @@ static void rockchip_pcie_set_power_limit(struct rockchip_pcie *rockchip) static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) { struct device *dev = rockchip->dev; - int err; + int err, i; u32 status; - gpiod_set_value(rockchip->ep_gpio, 0); + gpiod_set_value_cansleep(rockchip->ep_gpio, 0); err = reset_control_assert(rockchip->aclk_rst); if (err) { @@ -537,34 +563,36 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) return err; } - err = phy_init(rockchip->phy); - if (err < 0) { - dev_err(dev, "fail to init phy, err %d\n", err); - return err; + for (i = 0; i < MAX_LANE_NUM; i++) { + err = phy_init(rockchip->phys[i]); + if (err) { + dev_err(dev, "init phy%d err %d\n", i, err); + goto err_exit_phy; + } } err = reset_control_assert(rockchip->core_rst); if (err) { dev_err(dev, "assert core_rst err %d\n", err); - return err; + goto err_exit_phy; } err = reset_control_assert(rockchip->mgmt_rst); if (err) { dev_err(dev, "assert mgmt_rst err %d\n", err); - return err; + goto err_exit_phy; } err = reset_control_assert(rockchip->mgmt_sticky_rst); if (err) { dev_err(dev, "assert mgmt_sticky_rst err %d\n", err); - return err; + goto err_exit_phy; } err = reset_control_assert(rockchip->pipe_rst); if (err) { dev_err(dev, "assert pipe_rst err %d\n", err); - return err; + goto err_exit_phy; } udelay(10); @@ -572,19 +600,19 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) err = reset_control_deassert(rockchip->pm_rst); if (err) { dev_err(dev, "deassert pm_rst err %d\n", err); - return err; + goto err_exit_phy; } err = reset_control_deassert(rockchip->aclk_rst); if (err) { dev_err(dev, "deassert aclk_rst err %d\n", err); - return err; + goto err_exit_phy; } err = reset_control_deassert(rockchip->pclk_rst); if (err) { dev_err(dev, "deassert pclk_rst err %d\n", err); - return err; + goto err_exit_phy; } if (rockchip->link_gen == 2) @@ -602,10 +630,12 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) PCIE_CLIENT_MODE_RC, PCIE_CLIENT_CONFIG); - err = phy_power_on(rockchip->phy); - if (err) { - dev_err(dev, "fail to power on phy, err %d\n", err); - return err; + for (i = 0; i < MAX_LANE_NUM; i++) { + err = phy_power_on(rockchip->phys[i]); + if (err) { + dev_err(dev, "power on phy%d err %d\n", i, err); + goto err_power_off_phy; + } } /* @@ -615,25 +645,25 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) err = reset_control_deassert(rockchip->mgmt_sticky_rst); if (err) { dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err); - return err; + goto err_power_off_phy; } err = reset_control_deassert(rockchip->core_rst); if (err) { dev_err(dev, "deassert core_rst err %d\n", err); - return err; + goto err_power_off_phy; } err = reset_control_deassert(rockchip->mgmt_rst); if (err) { dev_err(dev, "deassert mgmt_rst err %d\n", err); - return err; + goto err_power_off_phy; } err = reset_control_deassert(rockchip->pipe_rst); if (err) { dev_err(dev, "deassert pipe_rst err %d\n", err); - return err; + goto err_power_off_phy; } /* Fix the transmitted FTS count desired to exit from L0s. */ @@ -658,7 +688,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_write(rockchip, PCIE_CLIENT_LINK_TRAIN_ENABLE, PCIE_CLIENT_CONFIG); - gpiod_set_value(rockchip->ep_gpio, 1); + gpiod_set_value_cansleep(rockchip->ep_gpio, 1); /* 500ms timeout value should be enough for Gen1/2 training */ err = readl_poll_timeout(rockchip->apb_base + PCIE_CLIENT_BASIC_STATUS1, @@ -666,7 +696,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) 500 * USEC_PER_MSEC); if (err) { dev_err(dev, "PCIe link training gen1 timeout!\n"); - return -ETIMEDOUT; + goto err_power_off_phy; } if (rockchip->link_gen == 2) { @@ -691,6 +721,15 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) PCIE_CORE_PL_CONF_LANE_SHIFT); dev_dbg(dev, "current link width is x%d\n", status); + /* Power off unused lane(s) */ + rockchip->lanes_map = rockchip_pcie_lane_map(rockchip); + for (i = 0; i < MAX_LANE_NUM; i++) { + if (!(rockchip->lanes_map & BIT(i))) { + dev_dbg(dev, "idling lane %d\n", i); + phy_power_off(rockchip->phys[i]); + } + } + rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID, PCIE_CORE_CONFIG_VENDOR); rockchip_pcie_write(rockchip, @@ -715,6 +754,26 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) rockchip_pcie_write(rockchip, status, PCIE_RC_CONFIG_DCSR); return 0; +err_power_off_phy: + while (i--) + phy_power_off(rockchip->phys[i]); + i = MAX_LANE_NUM; +err_exit_phy: + while (i--) + phy_exit(rockchip->phys[i]); + return err; +} + +static void rockchip_pcie_deinit_phys(struct rockchip_pcie *rockchip) +{ + int i; + + for (i = 0; i < MAX_LANE_NUM; i++) { + /* inactive lanes are already powered off */ + if (rockchip->lanes_map & BIT(i)) + phy_power_off(rockchip->phys[i]); + phy_exit(rockchip->phys[i]); + } } static irqreturn_t rockchip_pcie_subsys_irq_handler(int irq, void *arg) @@ -853,6 +912,91 @@ static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } +static int rockchip_pcie_get_phys(struct rockchip_pcie *rockchip) +{ + struct device *dev = rockchip->dev; + struct phy *phy; + char *name; + u32 i; + + phy = devm_phy_get(dev, "pcie-phy"); + if (!IS_ERR(phy)) { + rockchip->legacy_phy = true; + rockchip->phys[0] = phy; + dev_warn(dev, "legacy phy model is deprecated!\n"); + return 0; + } + + if (PTR_ERR(phy) == -EPROBE_DEFER) + return PTR_ERR(phy); + + dev_dbg(dev, "missing legacy phy; search for per-lane PHY\n"); + + for (i = 0; i < MAX_LANE_NUM; i++) { + name = kasprintf(GFP_KERNEL, "pcie-phy-%u", i); + if (!name) + return -ENOMEM; + + phy = devm_of_phy_get(dev, dev->of_node, name); + kfree(name); + + if (IS_ERR(phy)) { + if (PTR_ERR(phy) != -EPROBE_DEFER) + dev_err(dev, "missing phy for lane %d: %ld\n", + i, PTR_ERR(phy)); + return PTR_ERR(phy); + } + + rockchip->phys[i] = phy; + } + + return 0; +} + +static int rockchip_pcie_setup_irq(struct rockchip_pcie *rockchip) +{ + int irq, err; + struct device *dev = rockchip->dev; + struct platform_device *pdev = to_platform_device(dev); + + irq = platform_get_irq_byname(pdev, "sys"); + if (irq < 0) { + dev_err(dev, "missing sys IRQ resource\n"); + return irq; + } + + err = devm_request_irq(dev, irq, rockchip_pcie_subsys_irq_handler, + IRQF_SHARED, "pcie-sys", rockchip); + if (err) { + dev_err(dev, "failed to request PCIe subsystem IRQ\n"); + return err; + } + + irq = platform_get_irq_byname(pdev, "legacy"); + if (irq < 0) { + dev_err(dev, "missing legacy IRQ resource\n"); + return irq; + } + + irq_set_chained_handler_and_data(irq, + rockchip_pcie_legacy_int_handler, + rockchip); + + irq = platform_get_irq_byname(pdev, "client"); + if (irq < 0) { + dev_err(dev, "missing client IRQ resource\n"); + return irq; + } + + err = devm_request_irq(dev, irq, rockchip_pcie_client_irq_handler, + IRQF_SHARED, "pcie-client", rockchip); + if (err) { + dev_err(dev, "failed to request PCIe client IRQ\n"); + return err; + } + + return 0; +} /** * rockchip_pcie_parse_dt - Parse Device Tree @@ -866,7 +1010,6 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) struct platform_device *pdev = to_platform_device(dev); struct device_node *node = dev->of_node; struct resource *regs; - int irq; int err; regs = platform_get_resource_byname(pdev, @@ -883,12 +1026,9 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) if (IS_ERR(rockchip->apb_base)) return PTR_ERR(rockchip->apb_base); - rockchip->phy = devm_phy_get(dev, "pcie-phy"); - if (IS_ERR(rockchip->phy)) { - if (PTR_ERR(rockchip->phy) != -EPROBE_DEFER) - dev_err(dev, "missing phy\n"); - return PTR_ERR(rockchip->phy); - } + err = rockchip_pcie_get_phys(rockchip); + if (err) + return err; rockchip->lanes = 1; err = of_property_read_u32(node, "num-lanes", &rockchip->lanes); @@ -903,49 +1043,50 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) if (rockchip->link_gen < 0 || rockchip->link_gen > 2) rockchip->link_gen = 2; - rockchip->core_rst = devm_reset_control_get(dev, "core"); + rockchip->core_rst = devm_reset_control_get_exclusive(dev, "core"); if (IS_ERR(rockchip->core_rst)) { if (PTR_ERR(rockchip->core_rst) != -EPROBE_DEFER) dev_err(dev, "missing core reset property in node\n"); return PTR_ERR(rockchip->core_rst); } - rockchip->mgmt_rst = devm_reset_control_get(dev, "mgmt"); + rockchip->mgmt_rst = devm_reset_control_get_exclusive(dev, "mgmt"); if (IS_ERR(rockchip->mgmt_rst)) { if (PTR_ERR(rockchip->mgmt_rst) != -EPROBE_DEFER) dev_err(dev, "missing mgmt reset property in node\n"); return PTR_ERR(rockchip->mgmt_rst); } - rockchip->mgmt_sticky_rst = devm_reset_control_get(dev, "mgmt-sticky"); + rockchip->mgmt_sticky_rst = devm_reset_control_get_exclusive(dev, + "mgmt-sticky"); if (IS_ERR(rockchip->mgmt_sticky_rst)) { if (PTR_ERR(rockchip->mgmt_sticky_rst) != -EPROBE_DEFER) dev_err(dev, "missing mgmt-sticky reset property in node\n"); return PTR_ERR(rockchip->mgmt_sticky_rst); } - rockchip->pipe_rst = devm_reset_control_get(dev, "pipe"); + rockchip->pipe_rst = devm_reset_control_get_exclusive(dev, "pipe"); if (IS_ERR(rockchip->pipe_rst)) { if (PTR_ERR(rockchip->pipe_rst) != -EPROBE_DEFER) dev_err(dev, "missing pipe reset property in node\n"); return PTR_ERR(rockchip->pipe_rst); } - rockchip->pm_rst = devm_reset_control_get(dev, "pm"); + rockchip->pm_rst = devm_reset_control_get_exclusive(dev, "pm"); if (IS_ERR(rockchip->pm_rst)) { if (PTR_ERR(rockchip->pm_rst) != -EPROBE_DEFER) dev_err(dev, "missing pm reset property in node\n"); return PTR_ERR(rockchip->pm_rst); } - rockchip->pclk_rst = devm_reset_control_get(dev, "pclk"); + rockchip->pclk_rst = devm_reset_control_get_exclusive(dev, "pclk"); if (IS_ERR(rockchip->pclk_rst)) { if (PTR_ERR(rockchip->pclk_rst) != -EPROBE_DEFER) dev_err(dev, "missing pclk reset property in node\n"); return PTR_ERR(rockchip->pclk_rst); } - rockchip->aclk_rst = devm_reset_control_get(dev, "aclk"); + rockchip->aclk_rst = devm_reset_control_get_exclusive(dev, "aclk"); if (IS_ERR(rockchip->aclk_rst)) { if (PTR_ERR(rockchip->aclk_rst) != -EPROBE_DEFER) dev_err(dev, "missing aclk reset property in node\n"); @@ -982,40 +1123,15 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) return PTR_ERR(rockchip->clk_pcie_pm); } - irq = platform_get_irq_byname(pdev, "sys"); - if (irq < 0) { - dev_err(dev, "missing sys IRQ resource\n"); - return -EINVAL; - } - - err = devm_request_irq(dev, irq, rockchip_pcie_subsys_irq_handler, - IRQF_SHARED, "pcie-sys", rockchip); - if (err) { - dev_err(dev, "failed to request PCIe subsystem IRQ\n"); + err = rockchip_pcie_setup_irq(rockchip); + if (err) return err; - } - - irq = platform_get_irq_byname(pdev, "legacy"); - if (irq < 0) { - dev_err(dev, "missing legacy IRQ resource\n"); - return -EINVAL; - } - - irq_set_chained_handler_and_data(irq, - rockchip_pcie_legacy_int_handler, - rockchip); - - irq = platform_get_irq_byname(pdev, "client"); - if (irq < 0) { - dev_err(dev, "missing client IRQ resource\n"); - return -EINVAL; - } - err = devm_request_irq(dev, irq, rockchip_pcie_client_irq_handler, - IRQF_SHARED, "pcie-client", rockchip); - if (err) { - dev_err(dev, "failed to request PCIe client IRQ\n"); - return err; + rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v"); + if (IS_ERR(rockchip->vpcie12v)) { + if (PTR_ERR(rockchip->vpcie12v) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_info(dev, "no vpcie12v regulator found\n"); } rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3"); @@ -1047,11 +1163,19 @@ static int rockchip_pcie_set_vpcie(struct rockchip_pcie *rockchip) struct device *dev = rockchip->dev; int err; + if (!IS_ERR(rockchip->vpcie12v)) { + err = regulator_enable(rockchip->vpcie12v); + if (err) { + dev_err(dev, "fail to enable vpcie12v regulator\n"); + goto err_out; + } + } + if (!IS_ERR(rockchip->vpcie3v3)) { err = regulator_enable(rockchip->vpcie3v3); if (err) { dev_err(dev, "fail to enable vpcie3v3 regulator\n"); - goto err_out; + goto err_disable_12v; } } @@ -1079,6 +1203,9 @@ err_disable_1v8: err_disable_3v3: if (!IS_ERR(rockchip->vpcie3v3)) regulator_disable(rockchip->vpcie3v3); +err_disable_12v: + if (!IS_ERR(rockchip->vpcie12v)) + regulator_disable(rockchip->vpcie12v); err_out: return err; } @@ -1116,7 +1243,7 @@ static int rockchip_pcie_init_irq_domain(struct rockchip_pcie *rockchip) return -EINVAL; } - rockchip->irq_domain = irq_domain_add_linear(intc, 4, + rockchip->irq_domain = irq_domain_add_linear(intc, PCI_NUM_INTX, &intx_domain_ops, rockchip); if (!rockchip->irq_domain) { dev_err(dev, "failed to get a INTx IRQ domain\n"); @@ -1270,6 +1397,56 @@ static int rockchip_pcie_wait_l2(struct rockchip_pcie *rockchip) return 0; } +static int rockchip_pcie_enable_clocks(struct rockchip_pcie *rockchip) +{ + struct device *dev = rockchip->dev; + int err; + + err = clk_prepare_enable(rockchip->aclk_pcie); + if (err) { + dev_err(dev, "unable to enable aclk_pcie clock\n"); + return err; + } + + err = clk_prepare_enable(rockchip->aclk_perf_pcie); + if (err) { + dev_err(dev, "unable to enable aclk_perf_pcie clock\n"); + goto err_aclk_perf_pcie; + } + + err = clk_prepare_enable(rockchip->hclk_pcie); + if (err) { + dev_err(dev, "unable to enable hclk_pcie clock\n"); + goto err_hclk_pcie; + } + + err = clk_prepare_enable(rockchip->clk_pcie_pm); + if (err) { + dev_err(dev, "unable to enable clk_pcie_pm clock\n"); + goto err_clk_pcie_pm; + } + + return 0; + +err_clk_pcie_pm: + clk_disable_unprepare(rockchip->hclk_pcie); +err_hclk_pcie: + clk_disable_unprepare(rockchip->aclk_perf_pcie); +err_aclk_perf_pcie: + clk_disable_unprepare(rockchip->aclk_pcie); + return err; +} + +static void rockchip_pcie_disable_clocks(void *data) +{ + struct rockchip_pcie *rockchip = data; + + clk_disable_unprepare(rockchip->clk_pcie_pm); + clk_disable_unprepare(rockchip->hclk_pcie); + clk_disable_unprepare(rockchip->aclk_perf_pcie); + clk_disable_unprepare(rockchip->aclk_pcie); +} + static int __maybe_unused rockchip_pcie_suspend_noirq(struct device *dev) { struct rockchip_pcie *rockchip = dev_get_drvdata(dev); @@ -1286,13 +1463,9 @@ static int __maybe_unused rockchip_pcie_suspend_noirq(struct device *dev) return ret; } - phy_power_off(rockchip->phy); - phy_exit(rockchip->phy); + rockchip_pcie_deinit_phys(rockchip); - clk_disable_unprepare(rockchip->clk_pcie_pm); - clk_disable_unprepare(rockchip->hclk_pcie); - clk_disable_unprepare(rockchip->aclk_perf_pcie); - clk_disable_unprepare(rockchip->aclk_pcie); + rockchip_pcie_disable_clocks(rockchip); if (!IS_ERR(rockchip->vpcie0v9)) regulator_disable(rockchip->vpcie0v9); @@ -1313,21 +1486,9 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev) } } - err = clk_prepare_enable(rockchip->clk_pcie_pm); + err = rockchip_pcie_enable_clocks(rockchip); if (err) - goto err_pcie_pm; - - err = clk_prepare_enable(rockchip->hclk_pcie); - if (err) - goto err_hclk_pcie; - - err = clk_prepare_enable(rockchip->aclk_perf_pcie); - if (err) - goto err_aclk_perf_pcie; - - err = clk_prepare_enable(rockchip->aclk_pcie); - if (err) - goto err_aclk_pcie; + goto err_disable_0v9; err = rockchip_pcie_init_port(rockchip); if (err) @@ -1335,7 +1496,7 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev) err = rockchip_pcie_cfg_atu(rockchip); if (err) - goto err_pcie_resume; + goto err_err_deinit_port; /* Need this to enter L1 again */ rockchip_pcie_update_txcredit_mui(rockchip); @@ -1343,15 +1504,13 @@ static int __maybe_unused rockchip_pcie_resume_noirq(struct device *dev) return 0; +err_err_deinit_port: + rockchip_pcie_deinit_phys(rockchip); err_pcie_resume: - clk_disable_unprepare(rockchip->aclk_pcie); -err_aclk_pcie: - clk_disable_unprepare(rockchip->aclk_perf_pcie); -err_aclk_perf_pcie: - clk_disable_unprepare(rockchip->hclk_pcie); -err_hclk_pcie: - clk_disable_unprepare(rockchip->clk_pcie_pm); -err_pcie_pm: + rockchip_pcie_disable_clocks(rockchip); +err_disable_0v9: + if (!IS_ERR(rockchip->vpcie0v9)) + regulator_disable(rockchip->vpcie0v9); return err; } @@ -1385,29 +1544,9 @@ static int rockchip_pcie_probe(struct platform_device *pdev) if (err) return err; - err = clk_prepare_enable(rockchip->aclk_pcie); - if (err) { - dev_err(dev, "unable to enable aclk_pcie clock\n"); - goto err_aclk_pcie; - } - - err = clk_prepare_enable(rockchip->aclk_perf_pcie); - if (err) { - dev_err(dev, "unable to enable aclk_perf_pcie clock\n"); - goto err_aclk_perf_pcie; - } - - err = clk_prepare_enable(rockchip->hclk_pcie); - if (err) { - dev_err(dev, "unable to enable hclk_pcie clock\n"); - goto err_hclk_pcie; - } - - err = clk_prepare_enable(rockchip->clk_pcie_pm); - if (err) { - dev_err(dev, "unable to enable hclk_pcie clock\n"); - goto err_pcie_pm; - } + err = rockchip_pcie_enable_clocks(rockchip); + if (err) + return err; err = rockchip_pcie_set_vpcie(rockchip); if (err) { @@ -1423,12 +1562,12 @@ static int rockchip_pcie_probe(struct platform_device *pdev) err = rockchip_pcie_init_irq_domain(rockchip); if (err < 0) - goto err_vpcie; + goto err_deinit_port; err = of_pci_get_host_bridge_resources(dev->of_node, 0, 0xff, &res, &io_base); if (err) - goto err_vpcie; + goto err_remove_irq_domain; err = devm_request_pci_bus_resources(dev, &res); if (err) @@ -1466,12 +1605,12 @@ static int rockchip_pcie_probe(struct platform_device *pdev) err = rockchip_pcie_cfg_atu(rockchip); if (err) - goto err_free_res; + goto err_unmap_iospace; rockchip->msg_region = devm_ioremap(dev, rockchip->msg_bus_addr, SZ_1M); if (!rockchip->msg_region) { err = -ENOMEM; - goto err_free_res; + goto err_unmap_iospace; } list_splice_init(&res, &bridge->windows); @@ -1484,7 +1623,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev) err = pci_scan_root_bus_bridge(bridge); if (err < 0) - goto err_free_res; + goto err_unmap_iospace; bus = bridge->bus; @@ -1498,9 +1637,17 @@ static int rockchip_pcie_probe(struct platform_device *pdev) pci_bus_add_devices(bus); return 0; +err_unmap_iospace: + pci_unmap_iospace(rockchip->io); err_free_res: pci_free_resource_list(&res); +err_remove_irq_domain: + irq_domain_remove(rockchip->irq_domain); +err_deinit_port: + rockchip_pcie_deinit_phys(rockchip); err_vpcie: + if (!IS_ERR(rockchip->vpcie12v)) + regulator_disable(rockchip->vpcie12v); if (!IS_ERR(rockchip->vpcie3v3)) regulator_disable(rockchip->vpcie3v3); if (!IS_ERR(rockchip->vpcie1v8)) @@ -1508,14 +1655,7 @@ err_vpcie: if (!IS_ERR(rockchip->vpcie0v9)) regulator_disable(rockchip->vpcie0v9); err_set_vpcie: - clk_disable_unprepare(rockchip->clk_pcie_pm); -err_pcie_pm: - clk_disable_unprepare(rockchip->hclk_pcie); -err_hclk_pcie: - clk_disable_unprepare(rockchip->aclk_perf_pcie); -err_aclk_perf_pcie: - clk_disable_unprepare(rockchip->aclk_pcie); -err_aclk_pcie: + rockchip_pcie_disable_clocks(rockchip); return err; } @@ -1529,14 +1669,12 @@ static int rockchip_pcie_remove(struct platform_device *pdev) pci_unmap_iospace(rockchip->io); irq_domain_remove(rockchip->irq_domain); - phy_power_off(rockchip->phy); - phy_exit(rockchip->phy); + rockchip_pcie_deinit_phys(rockchip); - clk_disable_unprepare(rockchip->clk_pcie_pm); - clk_disable_unprepare(rockchip->hclk_pcie); - clk_disable_unprepare(rockchip->aclk_perf_pcie); - clk_disable_unprepare(rockchip->aclk_pcie); + rockchip_pcie_disable_clocks(rockchip); + if (!IS_ERR(rockchip->vpcie12v)) + regulator_disable(rockchip->vpcie12v); if (!IS_ERR(rockchip->vpcie3v3)) regulator_disable(rockchip->vpcie3v3); if (!IS_ERR(rockchip->vpcie1v8)) diff --git a/drivers/pci/host/pcie-xilinx-nwl.c b/drivers/pci/host/pcie-xilinx-nwl.c index eec641a34fc5..65dea98b2643 100644 --- a/drivers/pci/host/pcie-xilinx-nwl.c +++ b/drivers/pci/host/pcie-xilinx-nwl.c @@ -133,7 +133,6 @@ #define CFG_DMA_REG_BAR GENMASK(2, 0) #define INT_PCI_MSI_NR (2 * 32) -#define INTX_NUM 4 /* Readin the PS_LINKUP */ #define PS_LINKUP_OFFSET 0x00000238 @@ -334,9 +333,8 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc) while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) & MSGF_LEG_SR_MASKALL) != 0) { - for_each_set_bit(bit, &status, INTX_NUM) { - virq = irq_find_mapping(pcie->legacy_irq_domain, - bit + 1); + for_each_set_bit(bit, &status, PCI_NUM_INTX) { + virq = irq_find_mapping(pcie->legacy_irq_domain, bit); if (virq) generic_handle_irq(virq); } @@ -436,6 +434,7 @@ static int nwl_legacy_map(struct irq_domain *domain, unsigned int irq, static const struct irq_domain_ops legacy_domain_ops = { .map = nwl_legacy_map, + .xlate = pci_irqd_intx_xlate, }; #ifdef CONFIG_PCI_MSI @@ -559,7 +558,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) } pcie->legacy_irq_domain = irq_domain_add_linear(legacy_intc_node, - INTX_NUM, + PCI_NUM_INTX, &legacy_domain_ops, pcie); @@ -813,7 +812,7 @@ static int nwl_pcie_parse_dt(struct nwl_pcie *pcie, pcie->irq_intx = platform_get_irq_byname(pdev, "intx"); if (pcie->irq_intx < 0) { dev_err(dev, "failed to get intx IRQ %d\n", pcie->irq_intx); - return -EINVAL; + return pcie->irq_intx; } irq_set_chained_handler_and_data(pcie->irq_intx, diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index f63fa5e0278c..94e13cb8608f 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -5,7 +5,7 @@ * * Based on the Tegra PCIe driver * - * Bits taken from Synopsys Designware Host controller driver and + * Bits taken from Synopsys DesignWare Host controller driver and * ARM PCI Host generic driver. * * This program is free software: you can redistribute it and/or modify @@ -60,6 +60,7 @@ #define XILINX_PCIE_INTR_MST_SLVERR BIT(27) #define XILINX_PCIE_INTR_MST_ERRP BIT(28) #define XILINX_PCIE_IMR_ALL_MASK 0x1FF30FED +#define XILINX_PCIE_IMR_ENABLE_MASK 0x1FF30F0D #define XILINX_PCIE_IDR_ALL_MASK 0xFFFFFFFF /* Root Port Error FIFO Read Register definitions */ @@ -369,6 +370,7 @@ static int xilinx_pcie_intx_map(struct irq_domain *domain, unsigned int irq, /* INTx IRQ Domain operations */ static const struct irq_domain_ops intx_domain_ops = { .map = xilinx_pcie_intx_map, + .xlate = pci_irqd_intx_xlate, }; /* PCIe HW Functions */ @@ -384,7 +386,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) { struct xilinx_pcie_port *port = (struct xilinx_pcie_port *)data; struct device *dev = port->dev; - u32 val, mask, status, msi_data; + u32 val, mask, status; /* Read interrupt decode and mask registers */ val = pcie_read(port, XILINX_PCIE_REG_IDR); @@ -424,8 +426,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) xilinx_pcie_clear_err_interrupts(port); } - if (status & XILINX_PCIE_INTR_INTX) { - /* INTx interrupt received */ + if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) { val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); /* Check whether interrupt valid */ @@ -434,41 +435,24 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) goto error; } - if (!(val & XILINX_PCIE_RPIFR1_MSI_INTR)) { - /* Clear interrupt FIFO register 1 */ - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, - XILINX_PCIE_REG_RPIFR1); - - /* Handle INTx Interrupt */ - val = ((val & XILINX_PCIE_RPIFR1_INTR_MASK) >> - XILINX_PCIE_RPIFR1_INTR_SHIFT) + 1; - generic_handle_irq(irq_find_mapping(port->leg_domain, - val)); - } - } - - if (status & XILINX_PCIE_INTR_MSI) { - /* MSI Interrupt */ - val = pcie_read(port, XILINX_PCIE_REG_RPIFR1); - - if (!(val & XILINX_PCIE_RPIFR1_INTR_VALID)) { - dev_warn(dev, "RP Intr FIFO1 read error\n"); - goto error; - } - + /* Decode the IRQ number */ if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { - msi_data = pcie_read(port, XILINX_PCIE_REG_RPIFR2) & - XILINX_PCIE_RPIFR2_MSG_DATA; + val = pcie_read(port, XILINX_PCIE_REG_RPIFR2) & + XILINX_PCIE_RPIFR2_MSG_DATA; + } else { + val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >> + XILINX_PCIE_RPIFR1_INTR_SHIFT; + val = irq_find_mapping(port->leg_domain, val); + } - /* Clear interrupt FIFO register 1 */ - pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, - XILINX_PCIE_REG_RPIFR1); + /* Clear interrupt FIFO register 1 */ + pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK, + XILINX_PCIE_REG_RPIFR1); - if (IS_ENABLED(CONFIG_PCI_MSI)) { - /* Handle MSI Interrupt */ - generic_handle_irq(msi_data); - } - } + /* Handle the interrupt */ + if (IS_ENABLED(CONFIG_PCI_MSI) || + !(val & XILINX_PCIE_RPIFR1_MSI_INTR)) + generic_handle_irq(val); } if (status & XILINX_PCIE_INTR_SLV_UNSUPP) @@ -524,7 +508,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) return -ENODEV; } - port->leg_domain = irq_domain_add_linear(pcie_intc_node, 4, + port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, &intx_domain_ops, port); if (!port->leg_domain) { @@ -571,8 +555,8 @@ static void xilinx_pcie_init_port(struct xilinx_pcie_port *port) XILINX_PCIE_IMR_ALL_MASK, XILINX_PCIE_REG_IDR); - /* Enable all interrupts */ - pcie_write(port, XILINX_PCIE_IMR_ALL_MASK, XILINX_PCIE_REG_IMR); + /* Enable all interrupts we handle */ + pcie_write(port, XILINX_PCIE_IMR_ENABLE_MASK, XILINX_PCIE_REG_IMR); /* Enable the Bridge enable bit */ pcie_write(port, pcie_read(port, XILINX_PCIE_REG_RPSC) | diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 6088c3083194..509893bc3e63 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -183,7 +183,7 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d int i, best = 1; unsigned long flags; - if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) + if (pci_is_bridge(msi_desc_to_pci_dev(desc)) || vmd->msix_count == 1) return &vmd->irqs[0]; raw_spin_lock_irqsave(&list_lock, flags); @@ -697,7 +697,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) return -ENODEV; vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + PCI_IRQ_MSIX); if (vmd->msix_count < 0) return vmd->msix_count; @@ -755,6 +755,11 @@ static void vmd_remove(struct pci_dev *dev) static int vmd_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < vmd->msix_count; i++) + devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); pci_save_state(pdev); return 0; @@ -763,6 +768,16 @@ static int vmd_suspend(struct device *dev) static int vmd_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = pci_get_drvdata(pdev); + int err, i; + + for (i = 0; i < vmd->msix_count; i++) { + err = devm_request_irq(dev, pci_irq_vector(pdev, i), + vmd_irq, IRQF_NO_THREAD, + "vmd", &vmd->irqs[i]); + if (err) + return err; + } pci_restore_state(pdev); return 0; diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c index 5f49c3fd736a..2f8659a148f5 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.c +++ b/drivers/pci/hotplug/cpcihp_zt5550.c @@ -280,7 +280,7 @@ static void zt5550_hc_remove_one(struct pci_dev *pdev) } -static struct pci_device_id zt5550_hc_pci_tbl[] = { +static const struct pci_device_id zt5550_hc_pci_tbl[] = { { PCI_VENDOR_ID_ZIATECH, PCI_DEVICE_ID_ZIATECH_5550_HC, PCI_ANY_ID, PCI_ANY_ID, }, { 0, } }; diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 33d300d12411..4d06b8461255 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -1417,7 +1417,7 @@ static void __exit unload_cpqphpd(void) iounmap(smbios_start); } -static struct pci_device_id hpcd_pci_tbl[] = { +static const struct pci_device_id hpcd_pci_tbl[] = { { /* handle any PCI Hotplug controller */ .class = ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00), diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 5efd01d84498..73cf84645c82 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -852,7 +852,7 @@ static int set_bus(struct slot *slot_cur) u8 speed; u8 cmd = 0x0; int retval; - static struct pci_device_id ciobx[] = { + static const struct pci_device_id ciobx[] = { { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) }, { }, }; diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 43e345ac296b..a6a4dac798e5 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -1153,7 +1153,7 @@ void ibmphp_free_ebda_pci_rsrc_queue(void) } } -static struct pci_device_id id_table[] = { +static const struct pci_device_id id_table[] = { { .vendor = PCI_VENDOR_ID_IBM, .device = HPC_DEVICE_ID, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 026830a138ae..e5d5ce9e3010 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -586,6 +586,14 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC); + + /* + * If we've already reported a power fault, don't report it again + * until we've done something to handle it. + */ + if (ctrl->power_fault_detected) + events &= ~PCI_EXP_SLTSTA_PFD; + if (!events) return IRQ_NONE; diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 7c203198b582..74f6a17e4614 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -163,8 +163,8 @@ static void pnv_php_detach_device_nodes(struct device_node *parent) of_node_put(dn); refcount = kref_read(&dn->kobj.kref); if (refcount != 1) - pr_warn("Invalid refcount %d on <%s>\n", - refcount, of_node_full_name(dn)); + pr_warn("Invalid refcount %d on <%pOF>\n", + refcount, dn); of_detach_node(dn); } diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 3f93a4e79595..a3449d717a99 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -150,8 +150,8 @@ static void dlpar_pci_add_bus(struct device_node *dn) /* Add EADS device to PHB bus, adding new entry to bus->devices */ dev = of_create_pci_dev(dn, phb->bus, pdn->devfn); if (!dev) { - printk(KERN_ERR "%s: failed to create pci dev for %s\n", - __func__, dn->full_name); + printk(KERN_ERR "%s: failed to create pci dev for %pOF\n", + __func__, dn); return; } diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index a796301ea03f..edb5d8a53020 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -102,7 +102,7 @@ static struct attribute *default_attrs[] = { NULL, }; -static struct attribute_group dlpar_attr_group = { +static const struct attribute_group dlpar_attr_group = { .attrs = default_attrs, }; diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 8d132024f06e..1e29abaaea08 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -318,7 +318,7 @@ int rpaphp_add_slot(struct device_node *dn) if (!is_php_dn(dn, &indexes, &names, &types, &power_domains)) return 0; - dbg("Entry %s: dn->full_name=%s\n", __func__, dn->full_name); + dbg("Entry %s: dn=%pOF\n", __func__, dn); /* register PCI devices */ name = (char *) &names[1]; diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index ea41ea1d3c00..32aabc533be8 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -95,7 +95,7 @@ int rpaphp_enable_slot(struct slot *slot) bus = pci_find_bus_by_node(slot->dn); if (!bus) { - err("%s: no pci_bus for dn %s\n", __func__, slot->dn->full_name); + err("%s: no pci_bus for dn %pOF\n", __func__, slot->dn); return -EINVAL; } @@ -125,7 +125,7 @@ int rpaphp_enable_slot(struct slot *slot) if (rpaphp_debug) { struct pci_dev *dev; - dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name); + dbg("%s: pci_devs of slot[%pOF]\n", __func__, slot->dn); list_for_each_entry(dev, &bus->devices, bus_list) dbg("\t%s\n", pci_name(dev)); } diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 388c4d8fcdd1..489862360f2c 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -122,8 +122,8 @@ int rpaphp_register_slot(struct slot *slot) int retval; int slotno = -1; - dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", - __func__, slot->dn->full_name, slot->index, slot->name, + dbg("%s registering slot:path[%pOF] index[%x], name[%s] pdomain[%x] type[%d]\n", + __func__, slot->dn, slot->index, slot->name, slot->power_domain, slot->type); /* should not try to register the same slot twice */ diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index 3454dc7385f1..7bfb87bd2b7e 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -351,7 +351,7 @@ static void shpc_remove(struct pci_dev *dev) kfree(ctrl); } -static struct pci_device_id shpcd_pci_tbl[] = { +static const struct pci_device_id shpcd_pci_tbl[] = { {PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0)}, { /* end: all zeroes */ } }; diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index de0ea474fb73..e5824c7b7b6b 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -1062,6 +1062,8 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev) if (rc) { ctrl_info(ctrl, "Can't get msi for the hotplug controller\n"); ctrl_info(ctrl, "Use INTx for the hotplug controller\n"); + } else { + pci_set_master(pdev); } rc = request_irq(ctrl->pci_dev->irq, shpc_isr, IRQF_SHARED, diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 120485d6f352..ac41c8be9200 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -331,7 +331,6 @@ failed: while (i--) pci_iov_remove_virtfn(dev, i, 0); - pcibios_sriov_disable(dev); err_pcibios: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); @@ -339,6 +338,8 @@ err_pcibios: ssleep(1); pci_cfg_access_unlock(dev); + pcibios_sriov_disable(dev); + if (iov->link != dev->devfn) sysfs_remove_link(&dev->dev.kobj, "dep_link"); @@ -357,14 +358,14 @@ static void sriov_disable(struct pci_dev *dev) for (i = 0; i < iov->num_VFs; i++) pci_iov_remove_virtfn(dev, i, 0); - pcibios_sriov_disable(dev); - iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); ssleep(1); pci_cfg_access_unlock(dev); + pcibios_sriov_disable(dev); + if (iov->link != dev->devfn) sysfs_remove_link(&dev->dev.kobj, "dep_link"); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2225afc1cbbb..496ed9130600 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1451,13 +1451,30 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, } EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); +/* + * Users of the generic MSI infrastructure expect a device to have a single ID, + * so with DMA aliases we have to pick the least-worst compromise. Devices with + * DMA phantom functions tend to still emit MSIs from the real function number, + * so we ignore those and only consider topological aliases where either the + * alias device or RID appears on a different bus number. We also make the + * reasonable assumption that bridges are walked in an upstream direction (so + * the last one seen wins), and the much braver assumption that the most likely + * case is that of PCI->PCIe so we should always use the alias RID. This echoes + * the logic from intel_irq_remapping's set_msi_sid(), which presumably works + * well enough in practice; in the face of the horrible PCIe<->PCI-X conditions + * for taking ownership all we can really do is close our eyes and hope... + */ static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data) { u32 *pa = data; + u8 bus = PCI_BUS_NUM(*pa); + + if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) + *pa = alias; - *pa = alias; return 0; } + /** * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID) * @domain: The interrupt domain @@ -1471,7 +1488,7 @@ static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data) u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) { struct device_node *of_node; - u32 rid = 0; + u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn); pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); @@ -1487,14 +1504,14 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) * @pdev: The PCI device * * Use the firmware data to find a device-specific MSI domain - * (i.e. not one that is ste as a default). + * (i.e. not one that is set as a default). * - * Returns: The coresponding MSI domain or NULL if none has been found. + * Returns: The corresponding MSI domain or NULL if none has been found. */ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { struct irq_domain *dom; - u32 rid = 0; + u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn); pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); dom = of_msi_map_get_device_domain(&pdev->dev, rid); diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index a7a41d9c29df..7e9e79575d93 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -123,7 +123,7 @@ static struct attribute *smbios_attributes[] = { NULL, }; -static struct attribute_group smbios_attr_group = { +static const struct attribute_group smbios_attr_group = { .attrs = smbios_attributes, .is_visible = smbios_instance_string_exist, }; @@ -260,7 +260,7 @@ static struct attribute *acpi_attributes[] = { NULL, }; -static struct attribute_group acpi_attr_group = { +static const struct attribute_group acpi_attr_group = { .attrs = acpi_attributes, .is_visible = acpi_index_string_exist, }; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 2f3780b50723..1eecfa301f7f 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -556,9 +556,9 @@ static ssize_t devspec_show(struct device *dev, struct pci_dev *pdev = to_pci_dev(dev); struct device_node *np = pci_device_to_OF_node(pdev); - if (np == NULL || np->full_name == NULL) + if (np == NULL) return 0; - return sprintf(buf, "%s", np->full_name); + return sprintf(buf, "%pOF", np); } static DEVICE_ATTR_RO(devspec); #endif @@ -1211,11 +1211,8 @@ static ssize_t pci_resource_io(struct file *filp, struct kobject *kobj, { struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); int bar = (unsigned long)attr->private; - struct resource *res; unsigned long port = off; - res = &pdev->resource[bar]; - port += pci_resource_start(pdev, bar); if (port > pci_resource_end(pdev, bar)) @@ -1431,7 +1428,7 @@ static ssize_t pci_read_rom(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute pci_config_attr = { +static const struct bin_attribute pci_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, @@ -1441,7 +1438,7 @@ static struct bin_attribute pci_config_attr = { .write = pci_write_config, }; -static struct bin_attribute pcie_config_attr = { +static const struct bin_attribute pcie_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, @@ -1735,7 +1732,7 @@ const struct attribute_group *pcie_dev_groups[] = { NULL, }; -static struct attribute_group pci_dev_hp_attr_group = { +static const struct attribute_group pci_dev_hp_attr_group = { .attrs = pci_dev_hp_attrs, .is_visible = pci_dev_hp_attrs_are_visible, }; @@ -1759,23 +1756,23 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj, return a->mode; } -static struct attribute_group sriov_dev_attr_group = { +static const struct attribute_group sriov_dev_attr_group = { .attrs = sriov_dev_attrs, .is_visible = sriov_attrs_are_visible, }; #endif /* CONFIG_PCI_IOV */ -static struct attribute_group pci_dev_attr_group = { +static const struct attribute_group pci_dev_attr_group = { .attrs = pci_dev_dev_attrs, .is_visible = pci_dev_attrs_are_visible, }; -static struct attribute_group pci_bridge_attr_group = { +static const struct attribute_group pci_bridge_attr_group = { .attrs = pci_bridge_attrs, .is_visible = pci_bridge_attrs_are_visible, }; -static struct attribute_group pcie_dev_attr_group = { +static const struct attribute_group pcie_dev_attr_group = { .attrs = pcie_dev_attrs, .is_visible = pcie_dev_attrs_are_visible, }; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 68e3b2b0da93..b0002daa50f3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -52,6 +52,7 @@ static void pci_pme_list_scan(struct work_struct *work); static LIST_HEAD(pci_pme_list); static DEFINE_MUTEX(pci_pme_list_mutex); static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan); +static DEFINE_MUTEX(pci_bridge_mutex); struct pci_pme_device { struct list_head list; @@ -892,7 +893,9 @@ EXPORT_SYMBOL_GPL(__pci_complete_power_transition); * -EINVAL if the requested state is invalid. * -EIO if device does not support PCI PM or its PM capabilities register has a * wrong version, or device doesn't support the requested state. + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. * 0 if device already is in the requested state. + * 0 if the transition is to D3 but D3 is not supported. * 0 if device's power state has been successfully changed. */ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) @@ -1348,10 +1351,16 @@ static void pci_enable_bridge(struct pci_dev *dev) if (bridge) pci_enable_bridge(bridge); + /* + * Hold pci_bridge_mutex to prevent a race when enabling two + * devices below the bridge simultaneously. The race may cause a + * PCI_COMMAND_MEMORY update to be lost (see changelog). + */ + mutex_lock(&pci_bridge_mutex); if (pci_is_enabled(dev)) { if (!dev->is_busmaster) pci_set_master(dev); - return; + goto end; } retval = pci_enable_device(dev); @@ -1359,6 +1368,8 @@ static void pci_enable_bridge(struct pci_dev *dev) dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", retval); pci_set_master(dev); +end: + mutex_unlock(&pci_bridge_mutex); } static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) @@ -1383,7 +1394,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) return 0; /* already enabled */ bridge = pci_upstream_bridge(dev); - if (bridge) + if (bridge && !pci_is_enabled(bridge)) pci_enable_bridge(bridge); /* only skip sriov related */ @@ -3818,27 +3829,49 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev) } EXPORT_SYMBOL(pci_wait_for_pending_transaction); -/* - * We should only need to wait 100ms after FLR, but some devices take longer. - * Wait for up to 1000ms for config space to return something other than -1. - * Intel IGD requires this when an LCD panel is attached. We read the 2nd - * dword because VFs don't implement the 1st dword. - */ static void pci_flr_wait(struct pci_dev *dev) { - int i = 0; + int delay = 1, timeout = 60000; u32 id; - do { - msleep(100); + /* + * Per PCIe r3.1, sec 6.6.2, a device must complete an FLR within + * 100ms, but may silently discard requests while the FLR is in + * progress. Wait 100ms before trying to access the device. + */ + msleep(100); + + /* + * After 100ms, the device should not silently discard config + * requests, but it may still indicate that it needs more time by + * responding to them with CRS completions. The Root Port will + * generally synthesize ~0 data to complete the read (except when + * CRS SV is enabled and the read was for the Vendor ID; in that + * case it synthesizes 0x0001 data). + * + * Wait for the device to return a non-CRS completion. Read the + * Command register instead of Vendor ID so we don't have to + * contend with the CRS SV value. + */ + pci_read_config_dword(dev, PCI_COMMAND, &id); + while (id == ~0) { + if (delay > timeout) { + dev_warn(&dev->dev, "not ready %dms after FLR; giving up\n", + 100 + delay - 1); + return; + } + + if (delay > 1000) + dev_info(&dev->dev, "not ready %dms after FLR; waiting\n", + 100 + delay - 1); + + msleep(delay); + delay *= 2; pci_read_config_dword(dev, PCI_COMMAND, &id); - } while (i++ < 10 && id == ~0); + } - if (id == ~0) - dev_warn(&dev->dev, "Failed to return from FLR\n"); - else if (i > 1) - dev_info(&dev->dev, "Required additional %dms to return from FLR\n", - (i - 1) * 100); + if (delay > 1000) + dev_info(&dev->dev, "ready %dms after FLR\n", 100 + delay - 1); } /** @@ -5405,8 +5438,8 @@ static int of_pci_bus_find_domain_nr(struct device *parent) use_dt_domains = 0; domain = pci_get_new_domain_nr(); } else { - dev_err(parent, "Node %s has inconsistent \"linux,pci-domain\" property in DT\n", - parent->of_node->full_name); + dev_err(parent, "Node %pOF has inconsistent \"linux,pci-domain\" property in DT\n", + parent->of_node); domain = -1; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 22e061738c6f..a6560c9baa52 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -235,6 +235,7 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; +int pci_configure_extended_tags(struct pci_dev *dev, void *ign); bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout); int pci_setup_device(struct pci_dev *dev); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index dea186a9d6b6..6ff5f5b4f5e6 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -32,16 +32,9 @@ static int aer_probe(struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); -static pci_ers_result_t aer_error_detected(struct pci_dev *dev, - enum pci_channel_state error); static void aer_error_resume(struct pci_dev *dev); static pci_ers_result_t aer_root_reset(struct pci_dev *dev); -static const struct pci_error_handlers aer_error_handlers = { - .error_detected = aer_error_detected, - .resume = aer_error_resume, -}; - static struct pcie_port_service_driver aerdriver = { .name = "aer", .port_type = PCI_EXP_TYPE_ROOT_PORT, @@ -49,9 +42,7 @@ static struct pcie_port_service_driver aerdriver = { .probe = aer_probe, .remove = aer_remove, - - .err_handler = &aer_error_handlers, - + .error_resume = aer_error_resume, .reset_link = aer_root_reset, }; @@ -350,20 +341,6 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) } /** - * aer_error_detected - update severity status - * @dev: pointer to Root Port's pci_dev data structure - * @error: error severity being notified by port bus - * - * Invoked by Port Bus driver during error recovery. - */ -static pci_ers_result_t aer_error_detected(struct pci_dev *dev, - enum pci_channel_state error) -{ - /* Root Port has no impact. Always recovers. */ - return PCI_ERS_RESULT_CAN_RECOVER; -} - -/** * aer_error_resume - clean up corresponding error status bits * @dev: pointer to Root Port's pci_dev data structure * diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index b1303b32053f..890efcc574cb 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -5,10 +5,10 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * This file implements the core part of PCI-Express AER. When an pci-express + * This file implements the core part of PCIe AER. When a PCIe * error is delivered, an error message will be collected and printed to * console, then, an error recovery procedure will be executed by following - * the pci error recovery rules. + * the PCI error recovery rules. * * Copyright (C) 2006 Intel Corp. * Tom Long Nguyen (tom.l.nguyen@intel.com) diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c index c39f32e42b4d..2d976a623ddc 100644 --- a/drivers/pci/pcie/pcie-dpc.c +++ b/drivers/pci/pcie/pcie-dpc.c @@ -16,17 +16,62 @@ #include <linux/pcieport_if.h> #include "../pci.h" +struct rp_pio_header_log_regs { + u32 dw0; + u32 dw1; + u32 dw2; + u32 dw3; +}; + +struct dpc_rp_pio_regs { + u32 status; + u32 mask; + u32 severity; + u32 syserror; + u32 exception; + + struct rp_pio_header_log_regs header_log; + u32 impspec_log; + u32 tlp_prefix_log[4]; + u32 log_size; + u16 first_error; +}; + struct dpc_dev { struct pcie_device *dev; struct work_struct work; int cap_pos; bool rp; + u32 rp_pio_status; +}; + +static const char * const rp_pio_error_string[] = { + "Configuration Request received UR Completion", /* Bit Position 0 */ + "Configuration Request received CA Completion", /* Bit Position 1 */ + "Configuration Request Completion Timeout", /* Bit Position 2 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "I/O Request received UR Completion", /* Bit Position 8 */ + "I/O Request received CA Completion", /* Bit Position 9 */ + "I/O Request Completion Timeout", /* Bit Position 10 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "Memory Request received UR Completion", /* Bit Position 16 */ + "Memory Request received CA Completion", /* Bit Position 17 */ + "Memory Request Completion Timeout", /* Bit Position 18 */ }; static int dpc_wait_rp_inactive(struct dpc_dev *dpc) { unsigned long timeout = jiffies + HZ; struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; u16 status; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status); @@ -36,15 +81,17 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status); } if (status & PCI_EXP_DPC_RP_BUSY) { - dev_warn(&pdev->dev, "DPC root port still busy\n"); + dev_warn(dev, "DPC root port still busy\n"); return -EBUSY; } return 0; } -static void dpc_wait_link_inactive(struct pci_dev *pdev) +static void dpc_wait_link_inactive(struct dpc_dev *dpc) { unsigned long timeout = jiffies + HZ; + struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; u16 lnk_status; pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); @@ -54,7 +101,7 @@ static void dpc_wait_link_inactive(struct pci_dev *pdev) pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); } if (lnk_status & PCI_EXP_LNKSTA_DLLLA) - dev_warn(&pdev->dev, "Link state not disabled for DPC event\n"); + dev_warn(dev, "Link state not disabled for DPC event\n"); } static void interrupt_event_handler(struct work_struct *work) @@ -76,17 +123,132 @@ static void interrupt_event_handler(struct work_struct *work) } pci_unlock_rescan_remove(); - dpc_wait_link_inactive(pdev); + dpc_wait_link_inactive(dpc); if (dpc->rp && dpc_wait_rp_inactive(dpc)) return; + if (dpc->rp && dpc->rp_pio_status) { + pci_write_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS, + dpc->rp_pio_status); + dpc->rp_pio_status = 0; + } + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT); } +static void dpc_rp_pio_print_tlp_header(struct device *dev, + struct rp_pio_header_log_regs *t) +{ + dev_err(dev, "TLP Header: %#010x %#010x %#010x %#010x\n", + t->dw0, t->dw1, t->dw2, t->dw3); +} + +static void dpc_rp_pio_print_error(struct dpc_dev *dpc, + struct dpc_rp_pio_regs *rp_pio) +{ + struct device *dev = &dpc->dev->device; + int i; + u32 status; + + dev_err(dev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n", + rp_pio->status, rp_pio->mask); + + dev_err(dev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n", + rp_pio->severity, rp_pio->syserror, rp_pio->exception); + + status = (rp_pio->status & ~rp_pio->mask); + + for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { + if (!(status & (1 << i))) + continue; + + dev_err(dev, "[%2d] %s%s\n", i, rp_pio_error_string[i], + rp_pio->first_error == i ? " (First)" : ""); + } + + dpc_rp_pio_print_tlp_header(dev, &rp_pio->header_log); + if (rp_pio->log_size == 4) + return; + dev_err(dev, "RP PIO ImpSpec Log %#010x\n", rp_pio->impspec_log); + + for (i = 0; i < rp_pio->log_size - 5; i++) + dev_err(dev, "TLP Prefix Header: dw%d, %#010x\n", i, + rp_pio->tlp_prefix_log[i]); +} + +static void dpc_rp_pio_get_info(struct dpc_dev *dpc, + struct dpc_rp_pio_regs *rp_pio) +{ + struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; + int i; + u16 cap; + u16 status; + + pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS, + &rp_pio->status); + pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_MASK, + &rp_pio->mask); + + pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_SEVERITY, + &rp_pio->severity); + pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_SYSERROR, + &rp_pio->syserror); + pci_read_config_dword(pdev, dpc->cap_pos + PCI_EXP_DPC_RP_PIO_EXCEPTION, + &rp_pio->exception); + + /* Get First Error Pointer */ + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status); + rp_pio->first_error = (status & 0x1f00) >> 8; + + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); + rp_pio->log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (rp_pio->log_size < 4 || rp_pio->log_size > 9) { + dev_err(dev, "RP PIO log size %u is invalid\n", + rp_pio->log_size); + return; + } + + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + &rp_pio->header_log.dw0); + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4, + &rp_pio->header_log.dw1); + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8, + &rp_pio->header_log.dw2); + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12, + &rp_pio->header_log.dw3); + if (rp_pio->log_size == 4) + return; + + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, + &rp_pio->impspec_log); + for (i = 0; i < rp_pio->log_size - 5; i++) + pci_read_config_dword(pdev, + dpc->cap_pos + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, + &rp_pio->tlp_prefix_log[i]); +} + +static void dpc_process_rp_pio_error(struct dpc_dev *dpc) +{ + struct dpc_rp_pio_regs rp_pio_regs; + + dpc_rp_pio_get_info(dpc, &rp_pio_regs); + dpc_rp_pio_print_error(dpc, &rp_pio_regs); + + dpc->rp_pio_status = rp_pio_regs.status; +} + static irqreturn_t dpc_irq(int irq, void *context) { struct dpc_dev *dpc = (struct dpc_dev *)context; struct pci_dev *pdev = dpc->dev->port; + struct device *dev = &dpc->dev->device; u16 status, source; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status); @@ -95,20 +257,24 @@ static irqreturn_t dpc_irq(int irq, void *context) if (!status || status == (u16)(~0)) return IRQ_NONE; - dev_info(&dpc->dev->device, "DPC containment event, status:%#06x source:%#06x\n", + dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n", status, source); if (status & PCI_EXP_DPC_STATUS_TRIGGER) { u16 reason = (status >> 1) & 0x3; u16 ext_reason = (status >> 5) & 0x3; - dev_warn(&dpc->dev->device, "DPC %s detected, remove downstream devices\n", + dev_warn(dev, "DPC %s detected, remove downstream devices\n", (reason == 0) ? "unmasked uncorrectable error" : (reason == 1) ? "ERR_NONFATAL" : (reason == 2) ? "ERR_FATAL" : (ext_reason == 0) ? "RP PIO error" : (ext_reason == 1) ? "software trigger" : "reserved error"); + /* show RP PIO error detail information */ + if (reason == 3 && ext_reason == 0) + dpc_process_rp_pio_error(dpc); + schedule_work(&dpc->work); } return IRQ_HANDLED; @@ -119,10 +285,11 @@ static int dpc_probe(struct pcie_device *dev) { struct dpc_dev *dpc; struct pci_dev *pdev = dev->port; + struct device *device = &dev->device; int status; u16 ctl, cap; - dpc = devm_kzalloc(&dev->device, sizeof(*dpc), GFP_KERNEL); + dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL); if (!dpc) return -ENOMEM; @@ -131,10 +298,10 @@ static int dpc_probe(struct pcie_device *dev) INIT_WORK(&dpc->work, interrupt_event_handler); set_service_data(dev, dpc); - status = devm_request_irq(&dev->device, dev->irq, dpc_irq, IRQF_SHARED, + status = devm_request_irq(device, dev->irq, dpc_irq, IRQF_SHARED, "pcie-dpc", dpc); if (status) { - dev_warn(&dev->device, "request IRQ%d failed: %d\n", dev->irq, + dev_warn(device, "request IRQ%d failed: %d\n", dev->irq, status); return status; } @@ -147,7 +314,7 @@ static int dpc_probe(struct pcie_device *dev) ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); - dev_info(&dev->device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", + dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", cap & 0xf, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), (cap >> 8) & 0xf, diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 8aa3f14bc87d..be635f017756 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -21,7 +21,6 @@ #include "../pci.h" #include "portdrv.h" -#include "aer/aerdrv.h" /* If this switch is set, PCIe port native services should not be enabled. */ bool pcie_ports_disabled; @@ -177,108 +176,20 @@ static void pcie_portdrv_remove(struct pci_dev *dev) pcie_port_device_remove(dev); } -static int error_detected_iter(struct device *device, void *data) -{ - struct pcie_device *pcie_device; - struct pcie_port_service_driver *driver; - struct aer_broadcast_data *result_data; - pci_ers_result_t status; - - result_data = (struct aer_broadcast_data *) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - driver = to_service_driver(device->driver); - if (!driver || - !driver->err_handler || - !driver->err_handler->error_detected) - return 0; - - pcie_device = to_pcie_device(device); - - /* Forward error detected message to service drivers */ - status = driver->err_handler->error_detected( - pcie_device->port, - result_data->state); - result_data->result = - merge_result(result_data->result, status); - } - - return 0; -} - static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, enum pci_channel_state error) { - struct aer_broadcast_data data = {error, PCI_ERS_RESULT_CAN_RECOVER}; - - /* get true return value from &data */ - device_for_each_child(&dev->dev, &data, error_detected_iter); - return data.result; -} - -static int mmio_enabled_iter(struct device *device, void *data) -{ - struct pcie_device *pcie_device; - struct pcie_port_service_driver *driver; - pci_ers_result_t status, *result; - - result = (pci_ers_result_t *) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - driver = to_service_driver(device->driver); - if (driver && - driver->err_handler && - driver->err_handler->mmio_enabled) { - pcie_device = to_pcie_device(device); - - /* Forward error message to service drivers */ - status = driver->err_handler->mmio_enabled( - pcie_device->port); - *result = merge_result(*result, status); - } - } - - return 0; + /* Root Port has no impact. Always recovers. */ + return PCI_ERS_RESULT_CAN_RECOVER; } static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) { - pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; - - /* get true return value from &status */ - device_for_each_child(&dev->dev, &status, mmio_enabled_iter); - return status; -} - -static int slot_reset_iter(struct device *device, void *data) -{ - struct pcie_device *pcie_device; - struct pcie_port_service_driver *driver; - pci_ers_result_t status, *result; - - result = (pci_ers_result_t *) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - driver = to_service_driver(device->driver); - if (driver && - driver->err_handler && - driver->err_handler->slot_reset) { - pcie_device = to_pcie_device(device); - - /* Forward error message to service drivers */ - status = driver->err_handler->slot_reset( - pcie_device->port); - *result = merge_result(*result, status); - } - } - - return 0; + return PCI_ERS_RESULT_RECOVERED; } static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) { - pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; - /* If fatal, restore cfg space for possible link reset at upstream */ if (dev->error_state == pci_channel_io_frozen) { dev->state_saved = true; @@ -287,9 +198,7 @@ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) pci_enable_pcie_error_reporting(dev); } - /* get true return value from &status */ - device_for_each_child(&dev->dev, &status, slot_reset_iter); - return status; + return PCI_ERS_RESULT_RECOVERED; } static int resume_iter(struct device *device, void *data) @@ -299,13 +208,11 @@ static int resume_iter(struct device *device, void *data) if (device->bus == &pcie_port_bus_type && device->driver) { driver = to_service_driver(device->driver); - if (driver && - driver->err_handler && - driver->err_handler->resume) { + if (driver && driver->error_resume) { pcie_device = to_pcie_device(device); /* Forward error message to service drivers */ - driver->err_handler->resume(pcie_device->port); + driver->error_resume(pcie_device->port); } } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e6a917b4acd3..ff94b69738a8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1745,21 +1745,50 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -static void pci_configure_extended_tags(struct pci_dev *dev) +int pci_configure_extended_tags(struct pci_dev *dev, void *ign) { - u32 dev_cap; + struct pci_host_bridge *host; + u32 cap; + u16 ctl; int ret; if (!pci_is_pcie(dev)) - return; + return 0; - ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &dev_cap); + ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); if (ret) - return; + return 0; + + if (!(cap & PCI_EXP_DEVCAP_EXT_TAG)) + return 0; + + ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl); + if (ret) + return 0; + + host = pci_find_host_bridge(dev->bus); + if (!host) + return 0; - if (dev_cap & PCI_EXP_DEVCAP_EXT_TAG) + /* + * If some device in the hierarchy doesn't handle Extended Tags + * correctly, make sure they're disabled. + */ + if (host->no_ext_tags) { + if (ctl & PCI_EXP_DEVCTL_EXT_TAG) { + dev_info(&dev->dev, "disabling Extended Tags\n"); + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_EXT_TAG); + } + return 0; + } + + if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) { + dev_info(&dev->dev, "enabling Extended Tags\n"); pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_EXT_TAG); + } + return 0; } /** @@ -1810,7 +1839,7 @@ static void pci_configure_device(struct pci_dev *dev) int ret; pci_configure_mps(dev); - pci_configure_extended_tags(dev); + pci_configure_extended_tags(dev, NULL); pci_configure_relaxed_ordering(dev); memset(&hpp, 0, sizeof(hpp)); @@ -1867,42 +1896,69 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus) } EXPORT_SYMBOL(pci_alloc_dev); -bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, - int crs_timeout) +static bool pci_bus_crs_vendor_id(u32 l) +{ + return (l & 0xffff) == 0x0001; +} + +static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, + int timeout) { int delay = 1; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) - return false; + if (!pci_bus_crs_vendor_id(*l)) + return true; /* not a CRS completion */ - /* some broken boards return 0 or ~0 if a slot is empty: */ - if (*l == 0xffffffff || *l == 0x00000000 || - *l == 0x0000ffff || *l == 0xffff0000) - return false; + if (!timeout) + return false; /* CRS, but caller doesn't want to wait */ /* - * Configuration Request Retry Status. Some root ports return the - * actual device ID instead of the synthetic ID (0xFFFF) required - * by the PCIe spec. Ignore the device ID and only check for - * (vendor id == 1). + * We got the reserved Vendor ID that indicates a completion with + * Configuration Request Retry Status (CRS). Retry until we get a + * valid Vendor ID or we time out. */ - while ((*l & 0xffff) == 0x0001) { - if (!crs_timeout) + while (pci_bus_crs_vendor_id(*l)) { + if (delay > timeout) { + pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); + return false; + } + if (delay >= 1000) + pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); msleep(delay); delay *= 2; + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) return false; - /* Card hasn't responded in 60 seconds? Must be stuck. */ - if (delay > crs_timeout) { - printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n", - pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn)); - return false; - } } + if (delay >= 1000) + pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); + + return true; +} + +bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, + int timeout) +{ + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) + return false; + + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (*l == 0xffffffff || *l == 0x00000000 || + *l == 0x0000ffff || *l == 0xffff0000) + return false; + + if (pci_bus_crs_vendor_id(*l)) + return pci_bus_wait_crs(bus, devfn, l, timeout); + return true; } EXPORT_SYMBOL(pci_bus_read_dev_vendor_id); @@ -2331,6 +2387,15 @@ void pcie_bus_configure_settings(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pcie_bus_configure_settings); +/* + * Called after each bus is probed, but before its children are examined. This + * is marked as __weak because multiple architectures define it. + */ +void __weak pcibios_fixup_bus(struct pci_bus *bus) +{ + /* nothing to do, expected to be removed in the future */ +} + unsigned int pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->busn_res.start; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a346487a9532..a2afb44fad10 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2062,7 +2062,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, /* * The 82575 and 82598 may experience data corruption issues when transitioning - * out of L0S. To prevent this we need to disable L0S on the pci-e link + * out of L0S. To prevent this we need to disable L0S on the PCIe link. */ static void quirk_disable_aspm_l0s(struct pci_dev *dev) { @@ -4227,6 +4227,18 @@ static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags) return acs_flags ? 0 : 1; } +static int pci_quirk_xgene_acs(struct pci_dev *dev, u16 acs_flags) +{ + /* + * X-Gene root matching this quirk do not allow peer-to-peer + * transactions with others, allowing masking out these bits as if they + * were unimplemented in the ACS capability. + */ + acs_flags &= ~(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF); + + return acs_flags ? 0 : 1; +} + /* * Many Intel PCH root ports do provide ACS-like features to disable peer * transactions and validate bus numbers in requests, but do not provide an @@ -4475,6 +4487,8 @@ static const struct pci_dev_acs_enabled { { 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */ /* Cavium ThunderX */ { PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs }, + /* APM X-Gene */ + { PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs }, { 0 } }; @@ -4747,23 +4761,6 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap); -/* - * VMD-enabled root ports will change the source ID for all messages - * to the VMD device. Rather than doing device matching with the source - * ID, the AER driver should traverse the child device tree, reading - * AER registers to find the faulting device. - */ -static void quirk_no_aersid(struct pci_dev *pdev) -{ - /* VMD Domain */ - if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000) - pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID; -} -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); - /* FLR may cause some 82579 devices to hang. */ static void quirk_intel_no_flr(struct pci_dev *dev) { @@ -4771,3 +4768,34 @@ static void quirk_intel_no_flr(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_intel_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_intel_no_flr); + +static void quirk_no_ext_tags(struct pci_dev *pdev) +{ + struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); + + if (!bridge) + return; + + bridge->no_ext_tags = 1; + dev_info(&pdev->dev, "disabling Extended Tags (this device can't handle them)\n"); + + pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0142, quirk_no_ext_tags); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags); + +#ifdef CONFIG_PCI_ATS +/* + * Some devices have a broken ATS implementation causing IOMMU stalls. + * Don't use ATS for those devices. + */ +static void quirk_no_ats(struct pci_dev *pdev) +{ + dev_info(&pdev->dev, "disabling ATS (broken on this device)\n"); + pdev->ats_cap = 0; +} + +/* AMD Stoney platform GPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_no_ats); +#endif /* CONFIG_PCI_ATS */ diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index 81eda3d93a5d..86106c44ce94 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -17,12 +17,6 @@ #include <linux/cache.h> #include "pci.h" -void __weak pcibios_update_irq(struct pci_dev *dev, int irq) -{ - dev_dbg(&dev->dev, "assigning IRQ %02d\n", irq); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - void pci_assign_irq(struct pci_dev *dev) { u8 pin; @@ -65,29 +59,5 @@ void pci_assign_irq(struct pci_dev *dev) /* Always tell the device, so the driver knows what is the real IRQ to use; the device does not use it. */ - pcibios_update_irq(dev, irq); -} - -void pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *), - int (*map_irq)(const struct pci_dev *, u8, u8)) -{ - /* - * Implement pci_fixup_irqs() through pci_assign_irq(). - * This code should be remove eventually, it is a wrapper - * around pci_assign_irq() interface to keep current - * pci_fixup_irqs() behaviour unchanged on architecture - * code still relying on its interface. - */ - struct pci_dev *dev = NULL; - struct pci_host_bridge *hbrg = NULL; - - for_each_pci_dev(dev) { - hbrg = pci_find_host_bridge(dev->bus); - hbrg->swizzle_irq = swizzle; - hbrg->map_irq = map_irq; - pci_assign_irq(dev); - hbrg->swizzle_irq = NULL; - hbrg->map_irq = NULL; - } + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } -EXPORT_SYMBOL_GPL(pci_fixup_irqs); diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 85774b7a316a..e576e1a8d978 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -234,6 +234,19 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, return 0; } +/* + * We don't have to worry about legacy ISA devices, so nothing to do here. + * This is marked as __weak because multiple architectures define it; it should + * eventually go away. + */ +resource_size_t __weak pcibios_align_resource(void *data, + const struct resource *res, + resource_size_t size, + resource_size_t align) +{ + return res->start; +} + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, int resno, resource_size_t size, resource_size_t align) { diff --git a/drivers/phy/rockchip/phy-rockchip-pcie.c b/drivers/phy/rockchip/phy-rockchip-pcie.c index 6904633cad68..7cbdde029c0a 100644 --- a/drivers/phy/rockchip/phy-rockchip-pcie.c +++ b/drivers/phy/rockchip/phy-rockchip-pcie.c @@ -73,10 +73,38 @@ struct rockchip_pcie_data { struct rockchip_pcie_phy { struct rockchip_pcie_data *phy_data; struct regmap *reg_base; + struct phy_pcie_instance { + struct phy *phy; + u32 index; + } phys[PHY_MAX_LANE_NUM]; + struct mutex pcie_mutex; struct reset_control *phy_rst; struct clk *clk_pciephy_ref; + int pwr_cnt; + int init_cnt; }; +static struct rockchip_pcie_phy *to_pcie_phy(struct phy_pcie_instance *inst) +{ + return container_of(inst, struct rockchip_pcie_phy, + phys[inst->index]); +} + +static struct phy *rockchip_pcie_phy_of_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct rockchip_pcie_phy *rk_phy = dev_get_drvdata(dev); + + if (args->args_count == 0) + return rk_phy->phys[0].phy; + + if (WARN_ON(args->args[0] >= PHY_MAX_LANE_NUM)) + return ERR_PTR(-ENODEV); + + return rk_phy->phys[args->args[0]].phy; +} + + static inline void phy_wr_cfg(struct rockchip_pcie_phy *rk_phy, u32 addr, u32 data) { @@ -116,29 +144,59 @@ static inline u32 phy_rd_cfg(struct rockchip_pcie_phy *rk_phy, static int rockchip_pcie_phy_power_off(struct phy *phy) { - struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy); + struct phy_pcie_instance *inst = phy_get_drvdata(phy); + struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; + mutex_lock(&rk_phy->pcie_mutex); + + regmap_write(rk_phy->reg_base, + rk_phy->phy_data->pcie_laneoff, + HIWORD_UPDATE(PHY_LANE_IDLE_OFF, + PHY_LANE_IDLE_MASK, + PHY_LANE_IDLE_A_SHIFT + inst->index)); + + if (--rk_phy->pwr_cnt) + goto err_out; + err = reset_control_assert(rk_phy->phy_rst); if (err) { dev_err(&phy->dev, "assert phy_rst err %d\n", err); - return err; + goto err_restore; } +err_out: + mutex_unlock(&rk_phy->pcie_mutex); return 0; + +err_restore: + rk_phy->pwr_cnt++; + regmap_write(rk_phy->reg_base, + rk_phy->phy_data->pcie_laneoff, + HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, + PHY_LANE_IDLE_MASK, + PHY_LANE_IDLE_A_SHIFT + inst->index)); + mutex_unlock(&rk_phy->pcie_mutex); + return err; } static int rockchip_pcie_phy_power_on(struct phy *phy) { - struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy); + struct phy_pcie_instance *inst = phy_get_drvdata(phy); + struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; u32 status; unsigned long timeout; + mutex_lock(&rk_phy->pcie_mutex); + + if (rk_phy->pwr_cnt++) + goto err_out; + err = reset_control_deassert(rk_phy->phy_rst); if (err) { dev_err(&phy->dev, "deassert phy_rst err %d\n", err); - return err; + goto err_pwr_cnt; } regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf, @@ -146,6 +204,12 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) PHY_CFG_ADDR_MASK, PHY_CFG_ADDR_SHIFT)); + regmap_write(rk_phy->reg_base, + rk_phy->phy_data->pcie_laneoff, + HIWORD_UPDATE(!PHY_LANE_IDLE_OFF, + PHY_LANE_IDLE_MASK, + PHY_LANE_IDLE_A_SHIFT + inst->index)); + /* * No documented timeout value for phy operation below, * so we make it large enough here. And we use loop-break @@ -214,18 +278,29 @@ static int rockchip_pcie_phy_power_on(struct phy *phy) goto err_pll_lock; } +err_out: + mutex_unlock(&rk_phy->pcie_mutex); return 0; err_pll_lock: reset_control_assert(rk_phy->phy_rst); +err_pwr_cnt: + rk_phy->pwr_cnt--; + mutex_unlock(&rk_phy->pcie_mutex); return err; } static int rockchip_pcie_phy_init(struct phy *phy) { - struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy); + struct phy_pcie_instance *inst = phy_get_drvdata(phy); + struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); int err = 0; + mutex_lock(&rk_phy->pcie_mutex); + + if (rk_phy->init_cnt++) + goto err_out; + err = clk_prepare_enable(rk_phy->clk_pciephy_ref); if (err) { dev_err(&phy->dev, "Fail to enable pcie ref clock.\n"); @@ -238,20 +313,33 @@ static int rockchip_pcie_phy_init(struct phy *phy) goto err_reset; } - return err; +err_out: + mutex_unlock(&rk_phy->pcie_mutex); + return 0; err_reset: + clk_disable_unprepare(rk_phy->clk_pciephy_ref); err_refclk: + rk_phy->init_cnt--; + mutex_unlock(&rk_phy->pcie_mutex); return err; } static int rockchip_pcie_phy_exit(struct phy *phy) { - struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy); + struct phy_pcie_instance *inst = phy_get_drvdata(phy); + struct rockchip_pcie_phy *rk_phy = to_pcie_phy(inst); + + mutex_lock(&rk_phy->pcie_mutex); + + if (--rk_phy->init_cnt) + goto err_init_cnt; clk_disable_unprepare(rk_phy->clk_pciephy_ref); +err_init_cnt: + mutex_unlock(&rk_phy->pcie_mutex); return 0; } @@ -283,10 +371,11 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct rockchip_pcie_phy *rk_phy; - struct phy *generic_phy; struct phy_provider *phy_provider; struct regmap *grf; const struct of_device_id *of_id; + int i; + u32 phy_num; grf = syscon_node_to_regmap(dev->parent->of_node); if (IS_ERR(grf)) { @@ -305,6 +394,8 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev) rk_phy->phy_data = (struct rockchip_pcie_data *)of_id->data; rk_phy->reg_base = grf; + mutex_init(&rk_phy->pcie_mutex); + rk_phy->phy_rst = devm_reset_control_get(dev, "phy"); if (IS_ERR(rk_phy->phy_rst)) { if (PTR_ERR(rk_phy->phy_rst) != -EPROBE_DEFER) @@ -319,14 +410,26 @@ static int rockchip_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR(rk_phy->clk_pciephy_ref); } - generic_phy = devm_phy_create(dev, dev->of_node, &ops); - if (IS_ERR(generic_phy)) { - dev_err(dev, "failed to create PHY\n"); - return PTR_ERR(generic_phy); + /* parse #phy-cells to see if it's legacy PHY model */ + if (of_property_read_u32(dev->of_node, "#phy-cells", &phy_num)) + return -ENOENT; + + phy_num = (phy_num == 0) ? 1 : PHY_MAX_LANE_NUM; + dev_dbg(dev, "phy number is %d\n", phy_num); + + for (i = 0; i < phy_num; i++) { + rk_phy->phys[i].phy = devm_phy_create(dev, dev->of_node, &ops); + if (IS_ERR(rk_phy->phys[i].phy)) { + dev_err(dev, "failed to create PHY%d\n", i); + return PTR_ERR(rk_phy->phys[i].phy); + } + rk_phy->phys[i].index = i; + phy_set_drvdata(rk_phy->phys[i].phy, &rk_phy->phys[i]); } - phy_set_drvdata(generic_phy, rk_phy); - phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + platform_set_drvdata(pdev, rk_phy); + phy_provider = devm_of_phy_provider_register(dev, + rockchip_pcie_phy_of_xlate); return PTR_ERR_OR_ZERO(phy_provider); } diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c index 0831b428c217..4eb8e1a472b2 100644 --- a/drivers/platform/x86/alienware-wmi.c +++ b/drivers/platform/x86/alienware-wmi.c @@ -255,12 +255,13 @@ static int parse_rgb(const char *buf, struct platform_zone *zone) static struct platform_zone *match_zone(struct device_attribute *attr) { - int i; - for (i = 0; i < quirks->num_zones; i++) { - if ((struct device_attribute *)zone_data[i].attr == attr) { + u8 zone; + + for (zone = 0; zone < quirks->num_zones; zone++) { + if ((struct device_attribute *)zone_data[zone].attr == attr) { pr_debug("alienware-wmi: matched zone location: %d\n", - zone_data[i].location); - return &zone_data[i]; + zone_data[zone].location); + return &zone_data[zone]; } } return NULL; @@ -420,7 +421,7 @@ static DEVICE_ATTR(lighting_control_state, 0644, show_control_state, static int alienware_zone_init(struct platform_device *dev) { - int i; + u8 zone; char buffer[10]; char *name; @@ -457,19 +458,19 @@ static int alienware_zone_init(struct platform_device *dev) if (!zone_data) return -ENOMEM; - for (i = 0; i < quirks->num_zones; i++) { - sprintf(buffer, "zone%02X", i); + for (zone = 0; zone < quirks->num_zones; zone++) { + sprintf(buffer, "zone%02hhX", zone); name = kstrdup(buffer, GFP_KERNEL); if (name == NULL) return 1; - sysfs_attr_init(&zone_dev_attrs[i].attr); - zone_dev_attrs[i].attr.name = name; - zone_dev_attrs[i].attr.mode = 0644; - zone_dev_attrs[i].show = zone_show; - zone_dev_attrs[i].store = zone_set; - zone_data[i].location = i; - zone_attrs[i] = &zone_dev_attrs[i].attr; - zone_data[i].attr = &zone_dev_attrs[i]; + sysfs_attr_init(&zone_dev_attrs[zone].attr); + zone_dev_attrs[zone].attr.name = name; + zone_dev_attrs[zone].attr.mode = 0644; + zone_dev_attrs[zone].show = zone_show; + zone_dev_attrs[zone].store = zone_set; + zone_data[zone].location = zone; + zone_attrs[zone] = &zone_dev_attrs[zone].attr; + zone_data[zone].attr = &zone_dev_attrs[zone]; } zone_attrs[quirks->num_zones] = &dev_attr_lighting_control_state.attr; zone_attribute_group.attrs = zone_attrs; @@ -481,12 +482,13 @@ static int alienware_zone_init(struct platform_device *dev) static void alienware_zone_exit(struct platform_device *dev) { + u8 zone; + sysfs_remove_group(&dev->dev.kobj, &zone_attribute_group); led_classdev_unregister(&global_led); if (zone_dev_attrs) { - int i; - for (i = 0; i < quirks->num_zones; i++) - kfree(zone_dev_attrs[i].attr.name); + for (zone = 0; zone < quirks->num_zones; zone++) + kfree(zone_dev_attrs[zone].attr.name); } kfree(zone_dev_attrs); kfree(zone_data); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 709e3a67391a..48e1541dc8d4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -299,7 +299,7 @@ static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, union acpi_object *obj; u32 tmp = 0; - status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, 1, method_id, + status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, 0, method_id, &input, &output); if (ACPI_FAILURE(status)) @@ -1946,7 +1946,7 @@ static int show_call(struct seq_file *m, void *data) acpi_status status; status = wmi_evaluate_method(ASUS_WMI_MGMT_GUID, - 1, asus->debug.method_id, + 0, asus->debug.method_id, &input, &output); if (ACPI_FAILURE(status)) diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index dad8f4afa17c..28d9f8696081 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -48,7 +48,6 @@ MODULE_LICENSE("GPL"); #define DELL_EVENT_GUID "9DBB5994-A997-11DA-B012-B622A1EF5492" #define DELL_DESCRIPTOR_GUID "8D9DDCBC-A997-11DA-B012-B622A1EF5492" -static u32 dell_wmi_interface_version; static bool wmi_requires_smbios_request; MODULE_ALIAS("wmi:"DELL_EVENT_GUID); @@ -56,6 +55,7 @@ MODULE_ALIAS("wmi:"DELL_DESCRIPTOR_GUID); struct dell_wmi_priv { struct input_dev *input_dev; + u32 interface_version; }; static int __init dmi_matched(const struct dmi_system_id *dmi) @@ -348,6 +348,7 @@ static void dell_wmi_process_key(struct wmi_device *wdev, int type, int code) static void dell_wmi_notify(struct wmi_device *wdev, union acpi_object *obj) { + struct dell_wmi_priv *priv = dev_get_drvdata(&wdev->dev); u16 *buffer_entry, *buffer_end; acpi_size buffer_size; int len, i; @@ -376,7 +377,7 @@ static void dell_wmi_notify(struct wmi_device *wdev, * So to prevent reading garbage from buffer we will process only first * one event on devices with WMI interface version 0. */ - if (dell_wmi_interface_version == 0 && buffer_entry < buffer_end) + if (priv->interface_version == 0 && buffer_entry < buffer_end) if (buffer_end > buffer_entry + buffer_entry[0] + 1) buffer_end = buffer_entry + buffer_entry[0] + 1; @@ -626,61 +627,67 @@ static void dell_wmi_input_destroy(struct wmi_device *wdev) * WMI Interface Version 8 4 <version> * WMI buffer length 12 4 4096 */ -static int dell_wmi_check_descriptor_buffer(void) +static int dell_wmi_check_descriptor_buffer(struct wmi_device *wdev) { - struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - acpi_status status; + struct dell_wmi_priv *priv = dev_get_drvdata(&wdev->dev); + union acpi_object *obj = NULL; + struct wmi_device *desc_dev; u32 *buffer; + int ret; - status = wmi_query_block(DELL_DESCRIPTOR_GUID, 0, &out); - if (ACPI_FAILURE(status)) { - pr_err("Cannot read Dell descriptor buffer - %d\n", status); - return status; + desc_dev = wmidev_get_other_guid(wdev, DELL_DESCRIPTOR_GUID); + if (!desc_dev) { + dev_err(&wdev->dev, "Dell WMI descriptor does not exist\n"); + return -ENODEV; } - obj = (union acpi_object *)out.pointer; + obj = wmidev_block_query(desc_dev, 0); if (!obj) { - pr_err("Dell descriptor buffer is empty\n"); - return -EINVAL; + dev_err(&wdev->dev, "failed to read Dell WMI descriptor\n"); + ret = -EIO; + goto out; } if (obj->type != ACPI_TYPE_BUFFER) { - pr_err("Cannot read Dell descriptor buffer\n"); - kfree(obj); - return -EINVAL; + dev_err(&wdev->dev, "Dell descriptor has wrong type\n"); + ret = -EINVAL; + goto out; } if (obj->buffer.length != 128) { - pr_err("Dell descriptor buffer has invalid length (%d)\n", + dev_err(&wdev->dev, + "Dell descriptor buffer has invalid length (%d)\n", obj->buffer.length); if (obj->buffer.length < 16) { - kfree(obj); - return -EINVAL; + ret = -EINVAL; + goto out; } } buffer = (u32 *)obj->buffer.pointer; if (buffer[0] != 0x4C4C4544 && buffer[1] != 0x494D5720) - pr_warn("Dell descriptor buffer has invalid signature (%*ph)\n", + dev_warn(&wdev->dev, "Dell descriptor buffer has invalid signature (%*ph)\n", 8, buffer); if (buffer[2] != 0 && buffer[2] != 1) - pr_warn("Dell descriptor buffer has unknown version (%d)\n", + dev_warn(&wdev->dev, "Dell descriptor buffer has unknown version (%d)\n", buffer[2]); if (buffer[3] != 4096) - pr_warn("Dell descriptor buffer has invalid buffer length (%d)\n", + dev_warn(&wdev->dev, "Dell descriptor buffer has invalid buffer length (%d)\n", buffer[3]); - dell_wmi_interface_version = buffer[2]; + priv->interface_version = buffer[2]; + ret = 0; - pr_info("Detected Dell WMI interface version %u\n", - dell_wmi_interface_version); + dev_info(&wdev->dev, "Detected Dell WMI interface version %u\n", + priv->interface_version); +out: kfree(obj); - return 0; + put_device(&desc_dev->dev); + return ret; } /* @@ -717,17 +724,19 @@ static int dell_wmi_events_set_enabled(bool enable) static int dell_wmi_probe(struct wmi_device *wdev) { + struct dell_wmi_priv *priv; int err; - struct dell_wmi_priv *priv = devm_kzalloc( + priv = devm_kzalloc( &wdev->dev, sizeof(struct dell_wmi_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + dev_set_drvdata(&wdev->dev, priv); - err = dell_wmi_check_descriptor_buffer(); + err = dell_wmi_check_descriptor_buffer(wdev); if (err) return err; - dev_set_drvdata(&wdev->dev, priv); - return dell_wmi_input_setup(wdev); } diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0df4209648d1..b4ed3dc983d5 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -107,13 +107,6 @@ enum hp_wmi_hardware_mask { HPWMI_TABLET_MASK = 0x04, }; -#define BIOS_ARGS_INIT(write, ctype, size) \ - (struct bios_args) { .signature = 0x55434553, \ - .command = (write) ? 0x2 : 0x1, \ - .commandtype = (ctype), \ - .datasize = (size), \ - .data = 0 } - struct bios_return { u32 sigpass; u32 return_code; @@ -188,6 +181,22 @@ struct rfkill2_device { static int rfkill2_count; static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES]; +/* map output size to the corresponding WMI method id */ +static inline int encode_outsize_for_pvsz(int outsize) +{ + if (outsize > 4096) + return -EINVAL; + if (outsize > 1024) + return 5; + if (outsize > 128) + return 4; + if (outsize > 4) + return 3; + if (outsize > 0) + return 2; + return 1; +} + /* * hp_wmi_perform_query * @@ -211,6 +220,7 @@ static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES]; static int hp_wmi_perform_query(int query, enum hp_wmi_command command, void *buffer, int insize, int outsize) { + int mid; struct bios_return *bios_return; int actual_outsize; union acpi_object *obj; @@ -225,11 +235,15 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; int ret = 0; + mid = encode_outsize_for_pvsz(outsize); + if (WARN_ON(mid < 0)) + return mid; + if (WARN_ON(insize > sizeof(args.data))) return -EINVAL; memcpy(&args.data, buffer, insize); - wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output); + wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output); obj = output.pointer; diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index c62e5e11ca4b..610ac8391caa 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -103,7 +103,7 @@ static void rtl_port_unmap(void __iomem *addr) static int ibm_rtl_write(u8 value) { int ret = 0, count = 0; - static u32 cmd_port_val; + u32 cmd_port_val; RTL_DEBUG("%s(%d)\n", __func__, value); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 603fc6050971..fe98d4ac0df3 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -42,6 +42,8 @@ #define IDEAPAD_RFKILL_DEV_NUM (3) +#define BM_CONSERVATION_BIT (5) + #define CFG_BT_BIT (16) #define CFG_3G_BIT (17) #define CFG_WIFI_BIT (18) @@ -55,6 +57,11 @@ static const char *const ideapad_wmi_fnesc_events[] = { #endif enum { + BMCMD_CONSERVATION_ON = 3, + BMCMD_CONSERVATION_OFF = 5, +}; + +enum { VPCCMD_R_VPC1 = 0x10, VPCCMD_R_BL_MAX, VPCCMD_R_BL, @@ -123,6 +130,23 @@ static int read_method_int(acpi_handle handle, const char *method, int *val) } } +static int method_gbmd(acpi_handle handle, unsigned long *ret) +{ + int result, val; + + result = read_method_int(handle, "GBMD", &val); + *ret = val; + return result; +} + +static int method_sbmc(acpi_handle handle, int cmd) +{ + acpi_status status; + + status = acpi_execute_simple_method(handle, "SBMC", cmd); + return ACPI_FAILURE(status) ? -1 : 0; +} + static int method_vpcr(acpi_handle handle, int cmd, int *ret) { acpi_status status; @@ -250,6 +274,13 @@ static int debugfs_status_show(struct seq_file *s, void *data) if (!read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &value)) seq_printf(s, "Camera status:\t%s(%lu)\n", value ? "On" : "Off", value); + seq_puts(s, "=====================\n"); + + if (!method_gbmd(priv->adev->handle, &value)) { + seq_printf(s, "Conservation mode:\t%s(%lu)\n", + test_bit(BM_CONSERVATION_BIT, &value) ? "On" : "Off", + value); + } return 0; } @@ -456,10 +487,45 @@ static ssize_t __maybe_unused touchpad_store(struct device *dev, static DEVICE_ATTR_RO(touchpad); +static ssize_t conservation_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ideapad_private *priv = dev_get_drvdata(dev); + unsigned long result; + + if (method_gbmd(priv->adev->handle, &result)) + return sprintf(buf, "-1\n"); + return sprintf(buf, "%u\n", test_bit(BM_CONSERVATION_BIT, &result)); +} + +static ssize_t conservation_mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ideapad_private *priv = dev_get_drvdata(dev); + bool state; + int ret; + + ret = kstrtobool(buf, &state); + if (ret) + return ret; + + ret = method_sbmc(priv->adev->handle, state ? + BMCMD_CONSERVATION_ON : + BMCMD_CONSERVATION_OFF); + if (ret < 0) + return -EIO; + return count; +} + +static DEVICE_ATTR_RW(conservation_mode); + static struct attribute *ideapad_attributes[] = { &dev_attr_camera_power.attr, &dev_attr_fan_mode.attr, &dev_attr_touchpad.attr, + &dev_attr_conservation_mode.attr, NULL }; @@ -477,6 +543,9 @@ static umode_t ideapad_is_visible(struct kobject *kobj, unsigned long value; supported = !read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &value); + } else if (attr == &dev_attr_conservation_mode.attr) { + supported = acpi_has_method(priv->adev->handle, "GBMD") && + acpi_has_method(priv->adev->handle, "SBMC"); } else supported = true; diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index a782c78e7c63..e34fd70b67af 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -230,7 +230,7 @@ wakeup: if (event != 0xc0) { if (!priv->array || !sparse_keymap_report_event(priv->array, event, 1, true)) - dev_info(&device->dev, "unknown event 0x%x\n", event); + dev_dbg(&device->dev, "unknown event 0x%x\n", event); return; } @@ -241,7 +241,7 @@ wakeup: } if (!sparse_keymap_report_event(priv->input_dev, ev_index, 1, true)) - dev_info(&device->dev, "unknown event index 0x%llx\n", + dev_dbg(&device->dev, "unknown event index 0x%llx\n", ev_index); } diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 480926786cb8..58c5ff36523a 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -83,7 +83,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) } else if (sparse_keymap_report_event(priv->input_dev, event, 1, true)) { return; } - dev_info(&device->dev, "unknown event index 0x%x\n", event); + dev_dbg(&device->dev, "unknown event index 0x%x\n", event); } static int intel_vbtn_probe(struct platform_device *device) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 871cfa682519..d79fbf924b13 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -108,13 +108,13 @@ static irqreturn_t mid_pb_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static struct mid_pb_ddata mfld_ddata = { +static const struct mid_pb_ddata mfld_ddata = { .mirqlvl1_addr = INTEL_MSIC_IRQLVL1MSK, .pbstat_addr = INTEL_MSIC_PBSTATUS, .pbstat_mask = MSIC_PB_LEVEL, }; -static struct mid_pb_ddata mrfld_ddata = { +static const struct mid_pb_ddata mrfld_ddata = { .mirqlvl1_addr = BCOVE_IRQLVL1MSK, .pbstat_addr = BCOVE_PBSTATUS, .pbstat_mask = BCOVE_PB_LEVEL, @@ -142,8 +142,10 @@ static int mid_pb_probe(struct platform_device *pdev) if (!id) return -ENODEV; - if (irq < 0) - return -EINVAL; + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; + } input = devm_input_allocate_device(&pdev->dev); if (!input) diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index 914bcd2edbde..17e08b42b0a9 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -110,6 +110,13 @@ static const struct pmc_reg_map spt_reg_map = { .pfear_sts = spt_pfear_map, .mphy_sts = spt_mphy_map, .pll_sts = spt_pll_map, + .slp_s0_offset = SPT_PMC_SLP_S0_RES_COUNTER_OFFSET, + .ltr_ignore_offset = SPT_PMC_LTR_IGNORE_OFFSET, + .regmap_length = SPT_PMC_MMIO_REG_LEN, + .ppfear0_offset = SPT_PMC_XRAM_PPFEAR0A, + .ppfear_buckets = SPT_PPFEAR_NUM_ENTRIES, + .pm_cfg_offset = SPT_PMC_PM_CFG_OFFSET, + .pm_read_disable_bit = SPT_PMC_READ_DISABLE_BIT, }; static const struct pci_device_id pmc_pci_ids[] = { @@ -157,12 +164,13 @@ static inline u32 pmc_core_adjust_slp_s0_step(u32 value) int intel_pmc_slp_s0_counter_read(u32 *data) { struct pmc_dev *pmcdev = &pmc; + const struct pmc_reg_map *map = pmcdev->map; u32 value; if (!pmcdev->has_slp_s0_res) return -EACCES; - value = pmc_core_reg_read(pmcdev, SPT_PMC_SLP_S0_RES_COUNTER_OFFSET); + value = pmc_core_reg_read(pmcdev, map->slp_s0_offset); *data = pmc_core_adjust_slp_s0_step(value); return 0; @@ -172,9 +180,10 @@ EXPORT_SYMBOL_GPL(intel_pmc_slp_s0_counter_read); static int pmc_core_dev_state_get(void *data, u64 *val) { struct pmc_dev *pmcdev = data; + const struct pmc_reg_map *map = pmcdev->map; u32 value; - value = pmc_core_reg_read(pmcdev, SPT_PMC_SLP_S0_RES_COUNTER_OFFSET); + value = pmc_core_reg_read(pmcdev, map->slp_s0_offset); *val = pmc_core_adjust_slp_s0_step(value); return 0; @@ -187,8 +196,8 @@ static int pmc_core_check_read_lock_bit(void) struct pmc_dev *pmcdev = &pmc; u32 value; - value = pmc_core_reg_read(pmcdev, SPT_PMC_PM_CFG_OFFSET); - return value & BIT(SPT_PMC_READ_DISABLE_BIT); + value = pmc_core_reg_read(pmcdev, pmcdev->map->pm_cfg_offset); + return value & BIT(pmcdev->map->pm_read_disable_bit); } #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -204,12 +213,13 @@ static int pmc_core_ppfear_sts_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; const struct pmc_bit_map *map = pmcdev->map->pfear_sts; - u8 pf_regs[NUM_ENTRIES]; + u8 pf_regs[PPFEAR_MAX_NUM_ENTRIES]; int index, iter; - iter = SPT_PMC_XRAM_PPFEAR0A; + iter = pmcdev->map->ppfear0_offset; - for (index = 0; index < NUM_ENTRIES; index++, iter++) + for (index = 0; index < pmcdev->map->ppfear_buckets && + index < PPFEAR_MAX_NUM_ENTRIES; index++, iter++) pf_regs[index] = pmc_core_reg_read_byte(pmcdev, iter); for (index = 0; map[index].name; index++) @@ -376,6 +386,7 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct pmc_dev *pmcdev = &pmc; + const struct pmc_reg_map *map = pmcdev->map; u32 val, buf_size, fd; int err = 0; @@ -392,9 +403,9 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, const char __user goto out_unlock; } - fd = pmc_core_reg_read(pmcdev, SPT_PMC_LTR_IGNORE_OFFSET); + fd = pmc_core_reg_read(pmcdev, map->ltr_ignore_offset); fd |= (1U << val); - pmc_core_reg_write(pmcdev, SPT_PMC_LTR_IGNORE_OFFSET, fd); + pmc_core_reg_write(pmcdev, map->ltr_ignore_offset, fd); out_unlock: mutex_unlock(&pmcdev->lock); @@ -530,8 +541,8 @@ static int pmc_core_probe(struct pci_dev *dev, const struct pci_device_id *id) } mutex_init(&pmcdev->lock); - pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit(); pmcdev->map = map; + pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit(); err = pmc_core_dbgfs_register(pmcdev); if (err < 0) diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel_pmc_core.h index 5a48e7728479..3d225a9cc09f 100644 --- a/drivers/platform/x86/intel_pmc_core.h +++ b/drivers/platform/x86/intel_pmc_core.h @@ -38,7 +38,8 @@ #define SPT_PMC_SLP_S0_RES_COUNTER_STEP 0x64 #define PMC_BASE_ADDR_MASK ~(SPT_PMC_MMIO_REG_LEN - 1) #define MTPMC_MASK 0xffff0000 -#define NUM_ENTRIES 5 +#define PPFEAR_MAX_NUM_ENTRIES 5 +#define SPT_PPFEAR_NUM_ENTRIES 5 #define SPT_PMC_READ_DISABLE_BIT 0x16 #define SPT_PMC_MSG_FULL_STS_BIT 0x18 #define NUM_RETRIES 100 @@ -126,10 +127,37 @@ struct pmc_bit_map { u32 bit_mask; }; +/** + * struct pmc_reg_map - Structure used to define parameter unique to a + PCH family + * @pfear_sts: Maps name of IP block to PPFEAR* bit + * @mphy_sts: Maps name of MPHY lane to MPHY status lane status bit + * @pll_sts: Maps name of PLL to corresponding bit status + * @slp_s0_offset: PWRMBASE offset to read SLP_S0 residency + * @ltr_ignore_offset: PWRMBASE offset to read/write LTR ignore bit + * @base_address: Base address of PWRMBASE defined in BIOS writer guide + * @regmap_length: Length of memory to map from PWRMBASE address to access + * @ppfear0_offset: PWRMBASE offset to to read PPFEAR* + * @ppfear_buckets: Number of 8 bits blocks to read all IP blocks from + * PPFEAR + * @pm_cfg_offset: PWRMBASE offset to PM_CFG register + * @pm_read_disable_bit: Bit index to read PMC_READ_DISABLE + * + * Each PCH has unique set of register offsets and bit indexes. This structure + * captures them to have a common implementation. + */ struct pmc_reg_map { const struct pmc_bit_map *pfear_sts; const struct pmc_bit_map *mphy_sts; const struct pmc_bit_map *pll_sts; + const u32 slp_s0_offset; + const u32 ltr_ignore_offset; + const u32 base_address; + const int regmap_length; + const u32 ppfear0_offset; + const int ppfear_buckets; + const u32 pm_cfg_offset; + const int pm_read_disable_bit; }; /** diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index f7cf981502cd..2c85f75e32b0 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -72,20 +72,20 @@ struct intel_scu_ipc_pdata_t { u8 irq_mode; }; -static struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { +static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, .irq_mode = 0, }; /* Penwell and Cloverview */ -static struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { +static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, .irq_mode = 1, }; -static struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { +static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { .i2c_base = 0xff00d000, .i2c_len = 0x10, .irq_mode = 0, diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c index cd21df982abd..d4fc42b4cbeb 100644 --- a/drivers/platform/x86/intel_telemetry_debugfs.c +++ b/drivers/platform/x86/intel_telemetry_debugfs.c @@ -331,6 +331,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = { static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = { TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf), + TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf), {} }; diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c index 6ebdbd2b04fc..e0424d5a795a 100644 --- a/drivers/platform/x86/intel_telemetry_pltdrv.c +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c @@ -46,7 +46,6 @@ #define TELEM_SAMPLING_DEFAULT_PERIOD 0xD #define TELEM_MAX_EVENTS_SRAM 28 -#define TELEM_MAX_OS_ALLOCATED_EVENTS 20 #define TELEM_SSRAM_STARTTIME_OFFSET 8 #define TELEM_SSRAM_EVTLOG_OFFSET 16 @@ -153,6 +152,30 @@ static struct telemetry_evtmap {"PC2_AND_MEM_SHALLOW_IDLE_RES", 0x1D40}, }; +static struct telemetry_evtmap + telemetry_glk_pss_default_events[TELEM_MAX_OS_ALLOCATED_EVENTS] = { + {"IA_CORE0_C6_RES", 0x0400}, + {"IA_CORE0_C6_CTR", 0x0000}, + {"IA_MODULE0_C7_RES", 0x0410}, + {"IA_MODULE0_C7_CTR", 0x000C}, + {"IA_C0_RES", 0x0805}, + {"PCS_LTR", 0x2801}, + {"PSTATES", 0x2802}, + {"SOC_S0I3_RES", 0x0407}, + {"SOC_S0I3_CTR", 0x0008}, + {"PCS_S0I3_CTR", 0x0007}, + {"PCS_C1E_RES", 0x0414}, + {"PCS_IDLE_STATUS", 0x2806}, + {"IA_PERF_LIMITS", 0x280B}, + {"GT_PERF_LIMITS", 0x280C}, + {"PCS_WAKEUP_S0IX_CTR", 0x0025}, + {"PCS_IDLE_BLOCKED", 0x2C00}, + {"PCS_S0IX_BLOCKED", 0x2C01}, + {"PCS_S0IX_WAKE_REASONS", 0x2C02}, + {"PCS_LTR_BLOCKING", 0x2C03}, + {"PC2_AND_MEM_SHALLOW_IDLE_RES", 0x1D40}, +}; + /* APL specific Data */ static struct telemetry_plt_config telem_apl_config = { .pss_config = { @@ -163,8 +186,19 @@ static struct telemetry_plt_config telem_apl_config = { }, }; +/* GLK specific Data */ +static struct telemetry_plt_config telem_glk_config = { + .pss_config = { + .telem_evts = telemetry_glk_pss_default_events, + }, + .ioss_config = { + .telem_evts = telemetry_apl_ioss_default_events, + }, +}; + static const struct x86_cpu_id telemetry_cpu_ids[] = { TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config), + TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config), {} }; diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index f6209b739ec0..620138236c89 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -184,7 +184,7 @@ static const struct backlight_ops msi_backlight_ops = { static void msi_wmi_notify(u32 value, void *context) { struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL }; - static struct key_entry *key; + struct key_entry *key; union acpi_object *obj; acpi_status status; diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c index f4bad83053a9..35d8b9a939f9 100644 --- a/drivers/platform/x86/mxm-wmi.c +++ b/drivers/platform/x86/mxm-wmi.c @@ -53,7 +53,7 @@ int mxm_wmi_call_mxds(int adapter) printk("calling mux switch %d\n", adapter); - status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input, + status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, &output); if (ACPI_FAILURE(status)) @@ -78,7 +78,7 @@ int mxm_wmi_call_mxmx(int adapter) printk("calling mux switch %d\n", adapter); - status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input, + status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, &output); if (ACPI_FAILURE(status)) diff --git a/drivers/platform/x86/peaq-wmi.c b/drivers/platform/x86/peaq-wmi.c index 77d1f90b0794..bc98ef95514a 100644 --- a/drivers/platform/x86/peaq-wmi.c +++ b/drivers/platform/x86/peaq-wmi.c @@ -39,7 +39,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev) struct acpi_buffer input = { sizeof(dummy), &dummy }; struct acpi_buffer output = { sizeof(obj), &obj }; - status = wmi_evaluate_method(PEAQ_DOLBY_BUTTON_GUID, 1, + status = wmi_evaluate_method(PEAQ_DOLBY_BUTTON_GUID, 0, PEAQ_DOLBY_BUTTON_METHOD_ID, &input, &output); if (ACPI_FAILURE(status)) @@ -51,7 +51,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev) return; } - if (peaq_ignore_events_counter && --peaq_ignore_events_counter >= 0) + if (peaq_ignore_events_counter && peaq_ignore_events_counter--) return; if (obj.integer.value) { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b22573131e53..2242d6035d9e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -24,7 +24,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define TPACPI_VERSION "0.25" -#define TPACPI_SYSFS_VERSION 0x020700 +#define TPACPI_SYSFS_VERSION 0x030000 /* * Changelog: @@ -6342,7 +6342,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm) switch (thermal_read_mode) { case TPACPI_THERMAL_TPEC_16: - res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, + res = sysfs_create_group(&tpacpi_hwmon->kobj, &thermal_temp_input16_group); if (res) return res; @@ -6350,7 +6350,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm) case TPACPI_THERMAL_TPEC_8: case TPACPI_THERMAL_ACPI_TMP07: case TPACPI_THERMAL_ACPI_UPDT: - res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, + res = sysfs_create_group(&tpacpi_hwmon->kobj, &thermal_temp_input8_group); if (res) return res; @@ -6367,13 +6367,13 @@ static void thermal_exit(void) { switch (thermal_read_mode) { case TPACPI_THERMAL_TPEC_16: - sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, + sysfs_remove_group(&tpacpi_hwmon->kobj, &thermal_temp_input16_group); break; case TPACPI_THERMAL_TPEC_8: case TPACPI_THERMAL_ACPI_TMP07: case TPACPI_THERMAL_ACPI_UPDT: - sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, + sysfs_remove_group(&tpacpi_hwmon->kobj, &thermal_temp_input8_group); break; case TPACPI_THERMAL_NONE: @@ -8696,7 +8696,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) fan_attributes[ARRAY_SIZE(fan_attributes)-2] = &dev_attr_fan2_input.attr; } - rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, + rc = sysfs_create_group(&tpacpi_hwmon->kobj, &fan_attr_group); if (rc < 0) return rc; @@ -8704,7 +8704,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) rc = driver_create_file(&tpacpi_hwmon_pdriver.driver, &driver_attr_fan_watchdog); if (rc < 0) { - sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, + sysfs_remove_group(&tpacpi_hwmon->kobj, &fan_attr_group); return rc; } @@ -8719,7 +8719,7 @@ static void fan_exit(void) "cancelling any pending fan watchdog tasks\n"); /* FIXME: can we really do this unconditionally? */ - sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group); + sysfs_remove_group(&tpacpi_hwmon->kobj, &fan_attr_group); driver_remove_file(&tpacpi_hwmon_pdriver.driver, &driver_attr_fan_watchdog); @@ -9149,16 +9149,6 @@ static void hotkey_driver_event(const unsigned int scancode) tpacpi_driver_event(TP_HKEY_EV_HOTKEY_BASE + scancode); } -/* sysfs name ---------------------------------------------------------- */ -static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME); -} - -static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL); - /* --------------------------------------------------------------------- */ /* /proc support */ @@ -9696,8 +9686,6 @@ static void thinkpad_acpi_module_exit(void) if (tpacpi_hwmon) hwmon_device_unregister(tpacpi_hwmon); - if (tp_features.sensors_pdev_attrs_registered) - device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); if (tpacpi_sensors_pdev) platform_device_unregister(tpacpi_sensors_pdev); if (tpacpi_pdev) @@ -9818,14 +9806,10 @@ static int __init thinkpad_acpi_module_init(void) thinkpad_acpi_module_exit(); return ret; } - ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name); - if (ret) { - pr_err("unable to create sysfs hwmon device attributes\n"); - thinkpad_acpi_module_exit(); - return ret; - } tp_features.sensors_pdev_attrs_registered = 1; - tpacpi_hwmon = hwmon_device_register(&tpacpi_sensors_pdev->dev); + tpacpi_hwmon = hwmon_device_register_with_groups( + &tpacpi_sensors_pdev->dev, TPACPI_NAME, NULL, NULL); + if (IS_ERR(tpacpi_hwmon)) { ret = PTR_ERR(tpacpi_hwmon); tpacpi_hwmon = NULL; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index e32ba575e8d9..0765b1797d4c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -218,7 +218,7 @@ u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out) if (!(block->flags & ACPI_WMI_METHOD)) return AE_BAD_DATA; - if (block->instance_count < instance) + if (block->instance_count <= instance) return AE_BAD_PARAMETER; input.count = 2; @@ -265,7 +265,7 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance, block = &wblock->gblock; handle = wblock->acpi_device->handle; - if (block->instance_count < instance) + if (block->instance_count <= instance) return AE_BAD_PARAMETER; /* Check GUID is a data block */ @@ -392,7 +392,7 @@ acpi_status wmi_set_block(const char *guid_string, u8 instance, block = &wblock->gblock; handle = wblock->acpi_device->handle; - if (block->instance_count < instance) + if (block->instance_count <= instance) return AE_BAD_PARAMETER; /* Check GUID is a data block */ diff --git a/include/linux/aer.h b/include/linux/aer.h index 04602cbe85dc..43799bd17a02 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -39,7 +39,7 @@ struct aer_capability_regs { }; #if defined(CONFIG_PCIEAER) -/* pci-e port driver needs this function to enable aer */ +/* PCIe port driver needs this function to enable AER */ int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); @@ -67,7 +67,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity, - struct aer_capability_regs *aer_regs); + int severity, struct aer_capability_regs *aer_regs); #endif //_AER_H_ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6383115e9d2c..2e028854bac7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -307,7 +307,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer); static inline void stack_tracer_disable(void) { /* Preemption or interupts must be disabled */ - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_inc(disable_stack_tracer); } @@ -320,7 +320,7 @@ static inline void stack_tracer_disable(void) */ static inline void stack_tracer_enable(void) { - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_dec(disable_stack_tracer); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 21a6fd6c44af..6882538eda32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); @@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC @@ -985,6 +986,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } +static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, + gpa_t gpa) +{ + return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); +} + static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index af5edbf3eea3..f7a04e1af112 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -62,11 +62,13 @@ struct pci_epc_ops { * @size: the size of the PCI address space * @bitmap: bitmap to manage the PCI address space * @pages: number of bits representing the address region + * @page_size: size of each page */ struct pci_epc_mem { phys_addr_t phys_base; size_t size; unsigned long *bitmap; + size_t page_size; int pages; }; @@ -98,6 +100,9 @@ struct pci_epc { #define devm_pci_epc_create(dev, ops) \ __devm_pci_epc_create((dev), (ops), THIS_MODULE) +#define pci_epc_mem_init(epc, phys_addr, size) \ + __pci_epc_mem_init((epc), (phys_addr), (size), PAGE_SIZE) + static inline void epc_set_drvdata(struct pci_epc *epc, void *data) { dev_set_drvdata(&epc->dev, data); @@ -135,7 +140,8 @@ void pci_epc_stop(struct pci_epc *epc); struct pci_epc *pci_epc_get(const char *epc_name); void pci_epc_put(struct pci_epc *epc); -int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size); +int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size, + size_t page_size); void pci_epc_mem_exit(struct pci_epc *epc); void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 0d529cb90143..60d551a9a1ba 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -14,17 +14,10 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> +#include <linux/pci.h> struct pci_epf; -enum pci_interrupt_pin { - PCI_INTERRUPT_UNKNOWN, - PCI_INTERRUPT_INTA, - PCI_INTERRUPT_INTB, - PCI_INTERRUPT_INTC, - PCI_INTERRUPT_INTD, -}; - enum pci_barno { BAR_0, BAR_1, @@ -149,6 +142,8 @@ static inline void *epf_get_drvdata(struct pci_epf *epf) return dev_get_drvdata(&epf->dev); } +const struct pci_epf_device_id * +pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf); struct pci_epf *pci_epf_create(const char *name); void pci_epf_destroy(struct pci_epf *epf); int __pci_epf_register_driver(struct pci_epf_driver *driver, diff --git a/include/linux/pci.h b/include/linux/pci.h index da05e5db06ac..f68c58a93dd0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -102,6 +102,28 @@ enum { DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES, }; +/** + * enum pci_interrupt_pin - PCI INTx interrupt values + * @PCI_INTERRUPT_UNKNOWN: Unknown or unassigned interrupt + * @PCI_INTERRUPT_INTA: PCI INTA pin + * @PCI_INTERRUPT_INTB: PCI INTB pin + * @PCI_INTERRUPT_INTC: PCI INTC pin + * @PCI_INTERRUPT_INTD: PCI INTD pin + * + * Corresponds to values for legacy PCI INTx interrupts, as can be found in the + * PCI_INTERRUPT_PIN register. + */ +enum pci_interrupt_pin { + PCI_INTERRUPT_UNKNOWN, + PCI_INTERRUPT_INTA, + PCI_INTERRUPT_INTB, + PCI_INTERRUPT_INTC, + PCI_INTERRUPT_INTD, +}; + +/* The number of legacy PCI INTx interrupts */ +#define PCI_NUM_INTX 4 + /* * pci_power_t values must match the bits in the Capabilities PME_Support * and Control/Status PowerState fields in the Power Management capability. @@ -453,6 +475,7 @@ struct pci_host_bridge { void *release_data; struct msi_controller *msi; unsigned int ignore_reset_delay:1; /* for entire hierarchy */ + unsigned int no_ext_tags:1; /* no Extended Tags */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, @@ -847,7 +870,6 @@ char *pcibios_setup(char *str); resource_size_t pcibios_align_resource(void *, const struct resource *, resource_size_t, resource_size_t); -void pcibios_update_irq(struct pci_dev *, int irq); /* Weak but can be overriden by arch */ void pci_fixup_cardbus(struct pci_bus *); @@ -1165,8 +1187,6 @@ void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); -void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), - int (*)(const struct pci_dev *, u8, u8)); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); #define HAVE_PCI_REQ_REGIONS 2 @@ -1399,6 +1419,38 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, NULL); } +/** + * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq + * @d: the INTx IRQ domain + * @node: the DT node for the device whose interrupt we're translating + * @intspec: the interrupt specifier data from the DT + * @intsize: the number of entries in @intspec + * @out_hwirq: pointer at which to write the hwirq number + * @out_type: pointer at which to write the interrupt type + * + * Translate a PCI INTx interrupt number from device tree in the range 1-4, as + * stored in the standard PCI_INTERRUPT_PIN register, to a value in the range + * 0-3 suitable for use in a 4 entry IRQ domain. That is, subtract one from the + * INTx value to obtain the hwirq number. + * + * Returns 0 on success, or -EINVAL if the interrupt specifier is out of range. + */ +static inline int pci_irqd_intx_xlate(struct irq_domain *d, + struct device_node *node, + const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + const u32 intx = intspec[0]; + + if (intx < PCI_INTERRUPT_INTA || intx > PCI_INTERRUPT_INTD) + return -EINVAL; + + *out_hwirq = intx - PCI_INTERRUPT_INTA; + return 0; +} + #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_auto; @@ -2064,7 +2116,7 @@ static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt) /** * pci_vpd_srdt_size - Extracts the Small Resource Data Type length - * @lrdt: Pointer to the beginning of the Small Resource Data Type tag + * @srdt: Pointer to the beginning of the Small Resource Data Type tag * * Returns the extracted Small Resource Data Type length. */ @@ -2075,7 +2127,7 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt) /** * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item - * @lrdt: Pointer to the beginning of the Small Resource Data Type tag + * @srdt: Pointer to the beginning of the Small Resource Data Type tag * * Returns the extracted Small Resource Data Type Tag Item. */ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index afcd130ab3a9..e8f3f7479224 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -38,7 +38,7 @@ static inline void set_service_data(struct pcie_device *dev, void *data) dev->priv_data = data; } -static inline void* get_service_data(struct pcie_device *dev) +static inline void *get_service_data(struct pcie_device *dev) { return dev->priv_data; } @@ -50,8 +50,8 @@ struct pcie_port_service_driver { int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); - /* Service Error Recovery Handler */ - const struct pci_error_handlers *err_handler; + /* Device driver may resume normal operations */ + void (*error_resume)(struct pci_dev *dev); /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5012b524283d..7f11050746ae 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -217,7 +217,6 @@ enum { TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, - TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, @@ -229,9 +228,6 @@ enum { * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file - * WAS_ENABLED - Set and stays set when an event was ever enabled - * (used for module unloading, if a module event is enabled, - * it is best to clear the buffers that used it). * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe @@ -241,7 +237,6 @@ enum { TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), - TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), @@ -306,6 +301,7 @@ enum { EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, + EVENT_FILE_FL_WAS_ENABLED_BIT, }; /* @@ -321,6 +317,7 @@ enum { * TRIGGER_MODE - When set, invoke the triggers associated with the event * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid + * WAS_ENABLED - Set when enabled to know to clear trace on module removal */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -333,6 +330,7 @@ enum { EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), + EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), }; struct trace_event_file { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6cd63c18708a..838887587411 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { struct kvm_ppc_smmu_info { __u64 flags; __u32 slb_size; - __u32 pad; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index c22d3ebaca20..f8d58045926f 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -513,6 +513,7 @@ #define PCI_EXP_DEVSTA_URD 0x0008 /* Unsupported Request Detected */ #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ @@ -556,7 +557,7 @@ #define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ @@ -639,7 +640,7 @@ #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ #define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5.0GT/s */ @@ -647,6 +648,7 @@ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ #define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ @@ -733,23 +735,17 @@ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ #define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -/* Correctable Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 -/* Non-fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 -/* Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ #define PCI_ERR_ROOT_STATUS 48 -#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ -/* Multi ERR_COR Received */ -#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Received */ -#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Received */ -#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 -#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ -#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ -#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 /* Multiple FATAL/NONFATAL */ +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First UNC is Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ #define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ /* Virtual Channel */ @@ -967,6 +963,7 @@ #define PCI_EXP_DPC_CAP_RP_EXT 0x20 /* Root Port Extensions for DPC */ #define PCI_EXP_DPC_CAP_POISONED_TLP 0x40 /* Poisoned TLP Egress Blocking Supported */ #define PCI_EXP_DPC_CAP_SW_TRIGGER 0x80 /* Software Triggering Supported */ +#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0xF00 /* RP PIO log size */ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL 6 /* DPC control */ @@ -980,6 +977,15 @@ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +#define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ +#define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO MASK */ +#define PCI_EXP_DPC_RP_PIO_SEVERITY 0x14 /* RP PIO Severity */ +#define PCI_EXP_DPC_RP_PIO_SYSERROR 0x18 /* RP PIO SysError */ +#define PCI_EXP_DPC_RP_PIO_EXCEPTION 0x1C /* RP PIO Exception */ +#define PCI_EXP_DPC_RP_PIO_HEADER_LOG 0x20 /* RP PIO Header Log */ +#define PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG 0x30 /* RP PIO ImpSpec Log */ +#define PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG 0x34 /* RP PIO TLP Prefix Log */ + /* Precision Time Measurement */ #define PCI_PTM_CAP 0x04 /* PTM Capability */ #define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 96cea88fa00f..6abfafd7f173 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2828,13 +2828,14 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are per_cpu ops, they still need their - * per_cpu field freed. Since, function tracing is + * If these are dynamic or per_cpu ops, they still + * need their data freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_PER_CPU) - per_cpu_ops_free(ops); + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) + goto free_ops; + return 0; } @@ -2900,6 +2901,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (IS_ENABLED(CONFIG_PREEMPT)) synchronize_rcu_tasks(); + free_ops: arch_ftrace_trampoline_free(ops); if (ops->flags & FTRACE_OPS_FL_PER_CPU) @@ -5690,10 +5692,51 @@ static int referenced_filters(struct dyn_ftrace *rec) return cnt; } +static void +clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash) +{ + struct ftrace_func_entry *entry; + struct dyn_ftrace *rec; + int i; + + if (ftrace_hash_empty(hash)) + return; + + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + entry = __ftrace_lookup_ip(hash, rec->ip); + /* + * Do not allow this rec to match again. + * Yeah, it may waste some memory, but will be removed + * if/when the hash is modified again. + */ + if (entry) + entry->ip = 0; + } +} + +/* Clear any records from hashs */ +static void clear_mod_from_hashes(struct ftrace_page *pg) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->ops || !tr->ops->func_hash) + continue; + mutex_lock(&tr->ops->func_hash->regex_lock); + clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash); + clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash); + mutex_unlock(&tr->ops->func_hash->regex_lock); + } + mutex_unlock(&trace_types_lock); +} + void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; struct ftrace_page **last_pg; + struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; int order; @@ -5723,14 +5766,25 @@ void ftrace_release_mod(struct module *mod) ftrace_update_tot_cnt -= pg->index; *last_pg = pg->next; - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); - kfree(pg); + + pg->next = tmp_page; + tmp_page = pg; } else last_pg = &pg->next; } out_unlock: mutex_unlock(&ftrace_lock); + + for (pg = tmp_page; pg; pg = tmp_page) { + + /* Needs to be called outside of ftrace_lock */ + clear_mod_from_hashes(pg); + + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); + tmp_page = pg->next; + kfree(pg); + } } void ftrace_module_enable(struct module *mod) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44004d8aa3b3..5360b7aec57a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1702,6 +1702,9 @@ void tracing_reset_all_online_cpus(void) struct trace_array *tr; list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->clear_trace) + continue; + tr->clear_trace = false; tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); @@ -2799,11 +2802,17 @@ static char *get_trace_buf(void) if (!buffer || buffer->nesting >= 4) return NULL; - return &buffer->buffer[buffer->nesting++][0]; + buffer->nesting++; + + /* Interrupts must see nesting incremented before we use the buffer */ + barrier(); + return &buffer->buffer[buffer->nesting][0]; } static void put_trace_buf(void) { + /* Don't let the decrement of nesting leak before this */ + barrier(); this_cpu_dec(trace_percpu_buffer->nesting); } @@ -6220,7 +6229,7 @@ static int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) + if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 490ba229931d..fb5d54d0d1b3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -245,6 +245,7 @@ struct trace_array { int stop_count; int clock_id; int nr_topts; + bool clear_trace; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 36132f9280e6..87468398b9ed 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -406,7 +406,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { tracing_stop_tgid_record(); - clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); @@ -466,7 +466,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ - call->flags |= TRACE_EVENT_FL_WAS_ENABLED; + set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); } break; } @@ -2058,6 +2058,10 @@ static void event_remove(struct trace_event_call *call) do_for_each_event_file(tr, file) { if (file->event_call != call) continue; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; + ftrace_event_enable_disable(file, 0); /* * The do_for_each_event_file() is @@ -2396,15 +2400,11 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; - bool clear_trace = false; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { - if (call->mod == mod) { - if (call->flags & TRACE_EVENT_FL_WAS_ENABLED) - clear_trace = true; + if (call->mod == mod) __trace_remove_event_call(call); - } } up_write(&trace_event_sem); @@ -2416,8 +2416,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - if (clear_trace) - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index cb917cebae29..b17ec642793b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -273,7 +273,7 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt) goto out_free; if (cnt > 1) { if (trace_selftest_test_global_cnt == 0) - goto out; + goto out_free; } if (trace_selftest_test_dyn_cnt == 0) goto out_free; diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index dcc0d5fdf5a2..5e975fee0f5b 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -194,15 +194,6 @@ endef ifdef REGENERATE_PARSERS -# GPERF -# --------------------------------------------------------------------------- -quiet_cmd_gperf = GPERF $@ - cmd_gperf = gperf -t --output-file $@ -a -C -E -g -k 1,3,$$ -p -t $< - -.PRECIOUS: $(src)/%.hash.c_shipped -$(src)/%.hash.c_shipped: $(src)/%.gperf - $(call cmd,gperf) - # LEX # --------------------------------------------------------------------------- LEX_PREFIX = $(if $(LEX_PREFIX_${baseprereq}),$(LEX_PREFIX_${baseprereq}),yy) diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index 5dc25ee01c77..d29c774f51b6 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -292,7 +292,7 @@ repeat: case ST_TYPEOF_1: if (token == IDENT) { - if (is_reserved_word(yytext, yyleng) + if (is_reserved_word(yytext, yyleng) >= 0 || find_symbol(yytext, SYM_TYPEDEF, 1)) { yyless(0); diff --git a/scripts/genksyms/lex.lex.c_shipped b/scripts/genksyms/lex.lex.c_shipped index d5a7474b3e57..ba2fda8dfdb2 100644 --- a/scripts/genksyms/lex.lex.c_shipped +++ b/scripts/genksyms/lex.lex.c_shipped @@ -2101,7 +2101,7 @@ repeat: case ST_TYPEOF_1: if (token == IDENT) { - if (is_reserved_word(yytext, yyleng) + if (is_reserved_word(yytext, yyleng) >= 0 || find_symbol(yytext, SYM_TYPEDEF, 1)) { yyless(0); diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index ad54a58d7dda..9074b477bff0 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -173,6 +173,7 @@ usage: "\t-D <dev> PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" "\t-b <bar num> BAR test (bar number between 0..5)\n" "\t-m <msi num> MSI test (msi number between 1..32)\n" + "\t-l Legacy IRQ test\n" "\t-r Read buffer test\n" "\t-w Write buffer test\n" "\t-c Copy buffer test\n" diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c index f63356151ad4..901b85ea6a59 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -367,11 +367,11 @@ static void launch_tests(void) /* Icebp traps */ ptrace(PTRACE_CONT, child_pid, NULL, 0); - check_success("Test icebp"); + check_success("Test icebp\n"); /* Int 3 traps */ ptrace(PTRACE_CONT, child_pid, NULL, 0); - check_success("Test int 3 trap"); + check_success("Test int 3 trap\n"); ptrace(PTRACE_CONT, child_pid, NULL, 0); } diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index 763f37fecfb8..cf6778441381 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -1,7 +1,6 @@ #define _GNU_SOURCE #include <cap-ng.h> -#include <err.h> #include <linux/capability.h> #include <stdbool.h> #include <string.h> @@ -18,6 +17,8 @@ #include <sys/prctl.h> #include <sys/stat.h> +#include "../kselftest.h" + #ifndef PR_CAP_AMBIENT #define PR_CAP_AMBIENT 47 # define PR_CAP_AMBIENT_IS_SET 1 @@ -27,6 +28,7 @@ #endif static int nerrs; +static pid_t mpid; /* main() pid is used to avoid duplicate test counts */ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) { @@ -36,29 +38,32 @@ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list int buf_len; buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); - if (buf_len < 0) { - err(1, "vsnprintf failed"); - } - if (buf_len >= sizeof(buf)) { - errx(1, "vsnprintf output truncated"); - } + if (buf_len < 0) + ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno)); + + if (buf_len >= sizeof(buf)) + ksft_exit_fail_msg("vsnprintf output truncated\n"); + fd = open(filename, O_WRONLY); if (fd < 0) { if ((errno == ENOENT) && enoent_ok) return; - err(1, "open of %s failed", filename); + ksft_exit_fail_msg("open of %s failed - %s\n", + filename, strerror(errno)); } written = write(fd, buf, buf_len); if (written != buf_len) { if (written >= 0) { - errx(1, "short write to %s", filename); + ksft_exit_fail_msg("short write to %s\n", filename); } else { - err(1, "write to %s failed", filename); + ksft_exit_fail_msg("write to %s failed - %s\n", + filename, strerror(errno)); } } if (close(fd) != 0) { - err(1, "close of %s failed", filename); + ksft_exit_fail_msg("close of %s failed - %s\n", + filename, strerror(errno)); } } @@ -95,11 +100,12 @@ static bool create_and_enter_ns(uid_t inner_uid) */ if (unshare(CLONE_NEWNS) == 0) { - printf("[NOTE]\tUsing global UIDs for tests\n"); + ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n"); if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) - err(1, "PR_SET_KEEPCAPS"); + ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n", + strerror(errno)); if (setresuid(inner_uid, inner_uid, -1) != 0) - err(1, "setresuid"); + ksft_exit_fail_msg("setresuid - %s\n", strerror(errno)); // Re-enable effective caps capng_get_caps_process(); @@ -107,22 +113,24 @@ static bool create_and_enter_ns(uid_t inner_uid) if (capng_have_capability(CAPNG_PERMITTED, i)) capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg( + "capng_apply - %s\n", strerror(errno)); have_outer_privilege = true; } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) { - printf("[NOTE]\tUsing a user namespace for tests\n"); + ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n"); maybe_write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid); write_file("/proc/self/gid_map", "0 %d 1", outer_gid); have_outer_privilege = false; } else { - errx(1, "must be root or be able to create a userns"); + ksft_exit_skip("must be root or be able to create a userns\n"); } if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0) - err(1, "remount everything private"); + ksft_exit_fail_msg("remount everything private - %s\n", + strerror(errno)); return have_outer_privilege; } @@ -131,20 +139,22 @@ static void chdir_to_tmpfs(void) { char cwd[PATH_MAX]; if (getcwd(cwd, sizeof(cwd)) != cwd) - err(1, "getcwd"); + ksft_exit_fail_msg("getcwd - %s\n", strerror(errno)); if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0) - err(1, "mount private tmpfs"); + ksft_exit_fail_msg("mount private tmpfs - %s\n", + strerror(errno)); if (chdir(cwd) != 0) - err(1, "chdir to private tmpfs"); + ksft_exit_fail_msg("chdir to private tmpfs - %s\n", + strerror(errno)); } static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) { int from = openat(fromfd, fromname, O_RDONLY); if (from == -1) - err(1, "open copy source"); + ksft_exit_fail_msg("open copy source - %s\n", strerror(errno)); int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700); @@ -154,10 +164,11 @@ static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) if (sz == 0) break; if (sz < 0) - err(1, "read"); + ksft_exit_fail_msg("read - %s\n", strerror(errno)); if (write(to, buf, sz) != sz) - err(1, "write"); /* no short writes on tmpfs */ + /* no short writes on tmpfs */ + ksft_exit_fail_msg("write - %s\n", strerror(errno)); } close(from); @@ -174,18 +185,20 @@ static bool fork_wait(void) int status; if (waitpid(child, &status, 0) != child || !WIFEXITED(status)) { - printf("[FAIL]\tChild died\n"); + ksft_print_msg("Child died\n"); nerrs++; } else if (WEXITSTATUS(status) != 0) { - printf("[FAIL]\tChild failed\n"); + ksft_print_msg("Child failed\n"); nerrs++; } else { - printf("[OK]\tChild succeeded\n"); + /* don't print this message for mpid */ + if (getpid() != mpid) + ksft_test_result_pass("Passed\n"); } - return false; } else { - err(1, "fork"); + ksft_exit_fail_msg("fork - %s\n", strerror(errno)); + return false; } } @@ -195,7 +208,7 @@ static void exec_other_validate_cap(const char *name, execl(name, name, (eff ? "1" : "0"), (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"), NULL); - err(1, "execl"); + ksft_exit_fail_msg("execl - %s\n", strerror(errno)); } static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) @@ -209,7 +222,8 @@ static int do_tests(int uid, const char *our_path) int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); if (ourpath_fd == -1) - err(1, "open '%s'", our_path); + ksft_exit_fail_msg("open '%s' - %s\n", + our_path, strerror(errno)); chdir_to_tmpfs(); @@ -221,30 +235,30 @@ static int do_tests(int uid, const char *our_path) copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_suidroot"); if (chown("validate_cap_suidroot", 0, -1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_suidnonroot"); if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_sgidroot"); if (chown("validate_cap_sgidroot", -1, 0) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_sgidnonroot"); if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); } capng_get_caps_process(); @@ -252,147 +266,162 @@ static int do_tests(int uid, const char *our_path) /* Make sure that i starts out clear */ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (uid == 0) { - printf("[RUN]\tRoot => ep\n"); + ksft_print_msg("[RUN]\tRoot => ep\n"); if (fork_wait()) exec_validate_cap(true, true, false, false); } else { - printf("[RUN]\tNon-root => no caps\n"); + ksft_print_msg("[RUN]\tNon-root => no caps\n"); if (fork_wait()) exec_validate_cap(false, false, false, false); } - printf("[OK]\tCheck cap_ambient manipulation rules\n"); + ksft_print_msg("Check cap_ambient manipulation rules\n"); /* We should not be able to add ambient caps yet. */ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) { if (errno == EINVAL) - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE isn't supported\n"); else - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); + ksft_test_result_pass( + "PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW); capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW); capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) { - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); + ksft_test_result_pass( + "PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have succeeded\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n"); + ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n"); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) { - printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n"); + ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n"); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_CLEAR_ALL"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n", + strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_RAISE"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n", + strerror(errno)); capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tDropping I should have dropped A\n"); + ksft_test_result_fail("Dropping I should have dropped A\n"); return 1; } - printf("[OK]\tBasic manipulation appears to work\n"); + ksft_test_result_pass("Basic manipulation appears to work\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (uid == 0) { - printf("[RUN]\tRoot +i => eip\n"); + ksft_print_msg("[RUN]\tRoot +i => eip\n"); if (fork_wait()) exec_validate_cap(true, true, true, false); } else { - printf("[RUN]\tNon-root +i => i\n"); + ksft_print_msg("[RUN]\tNon-root +i => i\n"); if (fork_wait()) exec_validate_cap(false, false, true, false); } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_RAISE"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n", + strerror(errno)); - printf("[RUN]\tUID %d +ia => eipa\n", uid); + ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid); if (fork_wait()) exec_validate_cap(true, true, true, true); /* The remaining tests need real privilege */ if (!have_outer_privilege) { - printf("[SKIP]\tSUID/SGID tests (needs privilege)\n"); + ksft_test_result_skip("SUID/SGID tests (needs privilege)\n"); goto done; } if (uid == 0) { - printf("[RUN]\tRoot +ia, suidroot => eipa\n"); + ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_suidroot", true, true, true, true); - printf("[RUN]\tRoot +ia, suidnonroot => ip\n"); + ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_suidnonroot", false, true, true, false); - printf("[RUN]\tRoot +ia, sgidroot => eipa\n"); + ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_sgidroot", true, true, true, true); if (fork_wait()) { - printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); + ksft_print_msg( + "[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); if (setresgid(1, 1, 1) != 0) - err(1, "setresgid"); + ksft_exit_fail_msg("setresgid - %s\n", + strerror(errno)); exec_other_validate_cap("./validate_cap_sgidroot", true, true, true, false); } - printf("[RUN]\tRoot +ia, sgidnonroot => eip\n"); + ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_sgidnonroot", true, true, true, false); } else { - printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); - exec_other_validate_cap("./validate_cap_sgidnonroot", + ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidnonroot", false, false, true, false); if (fork_wait()) { - printf("[RUN]\tNon-root +ia, sgidroot => i\n"); + ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n"); if (setresgid(1, 1, 1) != 0) - err(1, "setresgid"); + ksft_exit_fail_msg("setresgid - %s\n", + strerror(errno)); exec_other_validate_cap("./validate_cap_sgidroot", false, false, true, false); } } done: + ksft_print_cnts(); return nerrs ? 1 : 0; } @@ -400,23 +429,29 @@ int main(int argc, char **argv) { char *tmp1, *tmp2, *our_path; + ksft_print_header(); + /* Find our path */ tmp1 = strdup(argv[0]); if (!tmp1) - err(1, "strdup"); + ksft_exit_fail_msg("strdup - %s\n", strerror(errno)); tmp2 = dirname(tmp1); our_path = strdup(tmp2); if (!our_path) - err(1, "strdup"); + ksft_exit_fail_msg("strdup - %s\n", strerror(errno)); free(tmp1); + mpid = getpid(); + if (fork_wait()) { - printf("[RUN]\t+++ Tests with uid == 0 +++\n"); + ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n"); return do_tests(0, our_path); } + ksft_print_msg("==================================================\n"); + if (fork_wait()) { - printf("[RUN]\t+++ Tests with uid != 0 +++\n"); + ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n"); return do_tests(1, our_path); } diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c index dd3c45f7b23c..694cd73d4493 100644 --- a/tools/testing/selftests/capabilities/validate_cap.c +++ b/tools/testing/selftests/capabilities/validate_cap.c @@ -1,5 +1,4 @@ #include <cap-ng.h> -#include <err.h> #include <linux/capability.h> #include <stdbool.h> #include <string.h> @@ -7,6 +6,8 @@ #include <sys/prctl.h> #include <sys/auxv.h> +#include "../kselftest.h" + #ifndef PR_CAP_AMBIENT #define PR_CAP_AMBIENT 47 # define PR_CAP_AMBIENT_IS_SET 1 @@ -25,8 +26,10 @@ static bool bool_arg(char **argv, int i) return false; else if (!strcmp(argv[i], "1")) return true; - else - errx(1, "wrong argv[%d]", i); + else { + ksft_exit_fail_msg("wrong argv[%d]\n", i); + return false; + } } int main(int argc, char **argv) @@ -39,7 +42,7 @@ int main(int argc, char **argv) */ if (argc != 5) - errx(1, "wrong argc"); + ksft_exit_fail_msg("wrong argc\n"); #ifdef HAVE_GETAUXVAL if (getauxval(AT_SECURE)) @@ -51,23 +54,26 @@ int main(int argc, char **argv) capng_get_caps_process(); if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { - printf("[FAIL]\tWrong effective state%s\n", atsec); + ksft_print_msg("Wrong effective state%s\n", atsec); return 1; } + if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { - printf("[FAIL]\tWrong permitted state%s\n", atsec); + ksft_print_msg("Wrong permitted state%s\n", atsec); return 1; } + if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { - printf("[FAIL]\tWrong inheritable state%s\n", atsec); + ksft_print_msg("Wrong inheritable state%s\n", atsec); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { - printf("[FAIL]\tWrong ambient state%s\n", atsec); + ksft_print_msg("Wrong ambient state%s\n", atsec); return 1; } - printf("[OK]\tCapabilities after execve were correct\n"); + ksft_print_msg("%s: Capabilities after execve were correct\n", + "validate_cap:"); return 0; } diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 98b1d6565f2c..b18b253d7bfb 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -28,6 +28,12 @@ prerequisite() echo "CPU online/offline summary:" online_cpus=`cat $SYSFS/devices/system/cpu/online` online_max=${online_cpus##*-} + + if [[ "$online_cpus" = "$online_max" ]]; then + echo "$msg: since there is only one cpu: $online_cpus" + exit 0 + fi + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -89,8 +95,10 @@ online_cpu_expect_success() if ! online_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 + exit 1 elif ! cpu_is_online $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } @@ -100,8 +108,10 @@ online_cpu_expect_fail() if online_cpu $cpu 2> /dev/null; then echo $FUNCNAME $cpu: unexpected success >&2 + exit 1 elif ! cpu_is_offline $cpu; then echo $FUNCNAME $cpu: unexpected online >&2 + exit 1 fi } @@ -111,8 +121,10 @@ offline_cpu_expect_success() if ! offline_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 + exit 1 elif ! cpu_is_offline $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } @@ -122,8 +134,10 @@ offline_cpu_expect_fail() if offline_cpu $cpu 2> /dev/null; then echo $FUNCNAME $cpu: unexpected success >&2 + exit 1 elif ! cpu_is_online $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 14a03ea1e21d..abc706cf7702 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -8,15 +8,18 @@ # Released under the terms of the GPL v2. usage() { # errno [message] -[ "$2" ] && echo $2 +[ ! -z "$2" ] && echo $2 echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]" echo " Options:" echo " -h|--help Show help message" echo " -k|--keep Keep passed test logs" echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" +echo " -vvv Alias of -v -v -v (Show all commands immediately)" +echo " --fail-unsupported Treat UNSUPPORTED as a failure" echo " -d|--debug Debug mode (trace all shell commands)" echo " -l|--logdir <dir> Save logs on the <dir>" +echo " If <dir> is -, all logs output in console only" exit $1 } @@ -47,7 +50,7 @@ parse_opts() { # opts local OPT_TEST_CASES= local OPT_TEST_DIR= - while [ "$1" ]; do + while [ ! -z "$1" ]; do case "$1" in --help|-h) usage 0 @@ -56,15 +59,20 @@ parse_opts() { # opts KEEP_LOG=1 shift 1 ;; - --verbose|-v|-vv) + --verbose|-v|-vv|-vvv) VERBOSE=$((VERBOSE + 1)) [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) + [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2)) shift 1 ;; --debug|-d) DEBUG=1 shift 1 ;; + --fail-unsupported) + UNSUPPORTED_RESULT=1 + shift 1 + ;; --logdir|-l) LOG_DIR=$2 shift 2 @@ -88,7 +96,7 @@ parse_opts() { # opts ;; esac done - if [ "$OPT_TEST_CASES" ]; then + if [ ! -z "$OPT_TEST_CASES" ]; then TEST_CASES=$OPT_TEST_CASES fi } @@ -108,6 +116,7 @@ LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/ KEEP_LOG=0 DEBUG=0 VERBOSE=0 +UNSUPPORTED_RESULT=0 # Parse command-line options parse_opts $* @@ -119,14 +128,20 @@ if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then fi # Preparing logs -LOG_FILE=$LOG_DIR/ftracetest.log -mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" -date > $LOG_FILE +if [ "x$LOG_DIR" = "x-" ]; then + LOG_FILE= + date +else + LOG_FILE=$LOG_DIR/ftracetest.log + mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" + date > $LOG_FILE +fi + prlog() { # messages - echo "$@" | tee -a $LOG_FILE + [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE } catlog() { #file - cat $1 | tee -a $LOG_FILE + [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE } prlog "=== Ftrace unit tests ===" @@ -187,7 +202,7 @@ eval_result() { # sigval $UNSUPPORTED) prlog " [UNSUPPORTED]" UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO" - return 1 # this is not a bug, but the result should be reported. + return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) prlog " [XFAIL]" @@ -247,12 +262,20 @@ __run_test() { # testfile # Run one test case run_test() { # testfile local testname=`basename $1` - local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + if [ ! -z "$LOG_FILE" ] ; then + local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + else + local testlog=/proc/self/fd/1 + fi export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX` testcase $1 echo "execute$INSTANCE: "$1 > $testlog SIG_RESULT=0 - if [ $VERBOSE -ge 2 ]; then + if [ -z "$LOG_FILE" ]; then + __run_test $1 2>&1 + elif [ $VERBOSE -ge 3 ]; then + __run_test $1 | tee -a $testlog 2>&1 + elif [ $VERBOSE -eq 2 ]; then __run_test $1 2>> $testlog | tee -a $testlog else __run_test $1 >> $testlog 2>&1 @@ -260,9 +283,9 @@ run_test() { # testfile eval_result $SIG_RESULT if [ $? -eq 0 ]; then # Remove test log if the test was done as it was expected. - [ $KEEP_LOG -eq 0 ] && rm $testlog + [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog else - [ $VERBOSE -ge 1 ] && catlog $testlog + [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog TOTAL_RESULT=1 fi rm -rf $TMPDIR diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c index d24ab7421e73..54cd5c414e82 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -394,9 +394,11 @@ int main(int argc, char *argv[]) } } - printf("%s: Test requeue functionality\n", basename(argv[0])); - printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", - broadcast, locked, owner, timeout_ns); + ksft_print_header(); + ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0])); + ksft_print_msg( + "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); /* * FIXME: unit_test is obsolete now that we parse options and the diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c index e0a798ad0d21..08187a16507f 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -78,7 +78,8 @@ int main(int argc, char *argv[]) } } - printf("%s: Detect mismatched requeue_pi operations\n", + ksft_print_header(); + ksft_print_msg("%s: Detect mismatched requeue_pi operations\n", basename(argv[0])); if (pthread_create(&child, NULL, blocking_child, NULL)) { diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index 982f83577501..f0542a344d95 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -143,9 +143,10 @@ int main(int argc, char *argv[]) } } - printf("%s: Test signal handling during requeue_pi\n", + ksft_print_header(); + ksft_print_msg("%s: Test signal handling during requeue_pi\n", basename(argv[0])); - printf("\tArguments: <none>\n"); + ksft_print_msg("\tArguments: <none>\n"); sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c index bdc48dc047e5..6216de828093 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -97,8 +97,10 @@ int main(int argc, char **argv) } } - printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", - basename(argv[0])); + ksft_print_header(); + ksft_print_msg( + "%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); if (ret < 0) { diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index 6aadd560366e..bab3dfe1787f 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -68,9 +68,10 @@ int main(int argc, char *argv[]) } } - printf("%s: Block on a futex and wait for timeout\n", + ksft_print_header(); + ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); - printf("\tArguments: timeout=%ldns\n", timeout_ns); + ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); /* initialize timeout */ to.tv_sec = 0; diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c index d237a8b702f0..26975322545b 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -99,7 +99,8 @@ int main(int argc, char **argv) exit(1); } - printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + ksft_print_header(); + ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n", basename(argv[0])); diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 9a2c56fa7305..da15a63269b4 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -64,7 +64,8 @@ int main(int argc, char *argv[]) } } - printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + ksft_print_header(); + ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h index 4e7944984fbb..01989644e50a 100644 --- a/tools/testing/selftests/futex/include/logging.h +++ b/tools/testing/selftests/futex/include/logging.h @@ -109,22 +109,20 @@ void log_verbosity(int level) */ void print_result(const char *test_name, int ret) { - const char *result = "Unknown return code"; - switch (ret) { case RET_PASS: - ksft_inc_pass_cnt(); - result = PASS; - break; + ksft_test_result_pass("%s\n", test_name); + ksft_print_cnts(); + return; case RET_ERROR: - result = ERROR; - break; + ksft_test_result_error("%s\n", test_name); + ksft_print_cnts(); + return; case RET_FAIL: - ksft_inc_fail_cnt(); - result = FAIL; - break; + ksft_test_result_fail("%s\n", test_name); + ksft_print_cnts(); + return; } - printf("selftests: %s [%s]\n", test_name, result); } /* log level macros */ diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 08e90c2cc5cb..1ae565ed9bf0 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -19,7 +19,8 @@ #define KSFT_FAIL 1 #define KSFT_XFAIL 2 #define KSFT_XPASS 3 -#define KSFT_SKIP 4 +/* Treat skip as pass */ +#define KSFT_SKIP KSFT_PASS /* counters */ struct ksft_count { @@ -28,6 +29,7 @@ struct ksft_count { unsigned int ksft_xfail; unsigned int ksft_xpass; unsigned int ksft_xskip; + unsigned int ksft_error; }; static struct ksft_count ksft_cnt; @@ -36,7 +38,7 @@ static inline int ksft_test_num(void) { return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail + ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass + - ksft_cnt.ksft_xskip; + ksft_cnt.ksft_xskip + ksft_cnt.ksft_error; } static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; } @@ -44,6 +46,14 @@ static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; } static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; } static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; } static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; } +static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; } + +static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; } +static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; } +static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; } +static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; } +static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; } +static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { @@ -52,6 +62,10 @@ static inline void ksft_print_header(void) static inline void ksft_print_cnts(void) { + printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n", + ksft_cnt.ksft_pass, ksft_cnt.ksft_fail, + ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass, + ksft_cnt.ksft_xskip, ksft_cnt.ksft_error); printf("1..%d\n", ksft_test_num()); } @@ -101,6 +115,18 @@ static inline void ksft_test_result_skip(const char *msg, ...) va_end(args); } +static inline void ksft_test_result_error(const char *msg, ...) +{ + va_list args; + + ksft_cnt.ksft_error++; + + va_start(args, msg); + printf("not ok %d # error ", ksft_test_num()); + vprintf(msg, args); + va_end(args); +} + static inline int ksft_exit_pass(void) { ksft_print_cnts(); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index c56f72e07cd7..e81bd28bdd89 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -51,6 +51,9 @@ #define __KSELFTEST_HARNESS_H #define _GNU_SOURCE +#include <asm/types.h> +#include <errno.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -84,6 +87,14 @@ * E.g., #define TH_LOG_ENABLED 1 * * If no definition is provided, logging is enabled by default. + * + * If there is no way to print an error message for the process running the + * test (e.g. not allowed to write to stderr), it is still possible to get the + * ASSERT_* number for which the test failed. This behavior can be enabled by + * writing `_metadata->no_print = true;` before the check sequence that is + * unable to print. When an error occur, instead of printing an error message + * and calling `abort(3)`, the test process call `_exit(2)` with the assert + * number as argument, which is then printed by the parent process. */ #define TH_LOG(fmt, ...) do { \ if (TH_LOG_ENABLED) \ @@ -555,12 +566,18 @@ * return while still providing an optional block to the API consumer. */ #define OPTIONAL_HANDLER(_assert) \ - for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + for (; _metadata->trigger; _metadata->trigger = \ + __bail(_assert, _metadata->no_print, _metadata->step)) + +#define __INC_STEP(_metadata) \ + if (_metadata->passed && _metadata->step < 255) \ + _metadata->step++; #define __EXPECT(_expected, _seen, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ + if (_assert) __INC_STEP(_metadata); \ if (!(__exp _t __seen)) { \ unsigned long long __exp_print = (uintptr_t)__exp; \ unsigned long long __seen_print = (uintptr_t)__seen; \ @@ -576,6 +593,7 @@ #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ const char *__exp = (_expected); \ const char *__seen = (_seen); \ + if (_assert) __INC_STEP(_metadata); \ if (!(strcmp(__exp, __seen) _t 0)) { \ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ _metadata->passed = 0; \ @@ -590,6 +608,8 @@ struct __test_metadata { int termsig; int passed; int trigger; /* extra handler after the evaluation */ + __u8 step; + bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ struct __test_metadata *prev, *next; }; @@ -634,10 +654,13 @@ static inline void __register_test(struct __test_metadata *t) } } -static inline int __bail(int for_realz) +static inline int __bail(int for_realz, bool no_print, __u8 step) { - if (for_realz) + if (for_realz) { + if (no_print) + _exit(step); abort(); + } return 0; } @@ -655,18 +678,24 @@ void __run_test(struct __test_metadata *t) t->passed = 0; } else if (child_pid == 0) { t->fn(t); - _exit(t->passed); + /* return the step that failed or 0 */ + _exit(t->passed ? 0 : t->step); } else { /* TODO(wad) add timeout support. */ waitpid(child_pid, &status, 0); if (WIFEXITED(status)) { - t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; if (t->termsig != -1) { fprintf(TH_LOG_STREAM, "%s: Test exited normally " "instead of by signal (code: %d)\n", t->name, WEXITSTATUS(status)); + } else if (!t->passed) { + fprintf(TH_LOG_STREAM, + "%s: Test failed at step #%d\n", + t->name, + WEXITSTATUS(status)); } } else if (WIFSIGNALED(status)) { t->passed = 0; diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 959273c3a52e..693616651da5 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -11,15 +11,26 @@ TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) +.ONESHELL: define RUN_TESTS - @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \ + @test_num=`echo 0`; + @echo "TAP version 13"; + @for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ - cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests: $$BASENAME_TEST [FAIL]"; cd -;\ + test_num=`echo $$test_num+1 | bc`; \ + echo "selftests: $$BASENAME_TEST"; \ + echo "========================================"; \ + if [ ! -x $$BASENAME_TEST ]; then \ + echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ + echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ + else \ + cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ + fi; \ done; endef run_tests: all - $(RUN_TESTS) + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS)) define INSTALL_RULE @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 67908b18f035..7f3617274bf5 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -33,7 +33,7 @@ #include <unistd.h> #define MFD_DEF_SIZE 8192 -#define STACK_SIZE 65535 +#define STACK_SIZE 65536 static int sys_memfd_create(const char *name, unsigned int flags) diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config new file mode 100644 index 000000000000..598d0a225fc9 --- /dev/null +++ b/tools/testing/selftests/nsfs/config @@ -0,0 +1,3 @@ +CONFIG_USER_NS=y +CONFIG_UTS_NS=y +CONFIG_PID_NS=y diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore new file mode 100644 index 000000000000..5a4a26e5464b --- /dev/null +++ b/tools/testing/selftests/pstore/.gitignore @@ -0,0 +1,2 @@ +logs +*uuid diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile index 83dd42b2129e..d4064c742c26 100644 --- a/tools/testing/selftests/ptp/Makefile +++ b/tools/testing/selftests/ptp/Makefile @@ -1,3 +1,4 @@ +CFLAGS += -I../../../../usr/include/ TEST_PROGS := testptp LDLIBS += -lrt all: $(TEST_PROGS) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 73f5ea6778ce..4d6f92a9df6b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -107,7 +107,7 @@ TEST(mode_strict_support) ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SECCOMP"); } - syscall(__NR_exit, 1); + syscall(__NR_exit, 0); } TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index ccd07343d418..7d406c3973ba 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -17,6 +17,8 @@ #include <assert.h> #include <errno.h> +#include "../kselftest.h" + #ifndef SS_AUTODISARM #define SS_AUTODISARM (1U << 31) #endif @@ -41,8 +43,7 @@ void my_usr1(int sig, siginfo_t *si, void *u) if (sp < (unsigned long)sstack || sp >= (unsigned long)sstack + SIGSTKSZ) { - printf("[FAIL]\tSP is not on sigaltstack\n"); - exit(EXIT_FAILURE); + ksft_exit_fail_msg("SP is not on sigaltstack\n"); } /* put some data on stack. other sighandler will try to overwrite it */ aa = alloca(1024); @@ -50,21 +51,22 @@ void my_usr1(int sig, siginfo_t *si, void *u) p = (struct stk_data *)(aa + 512); strcpy(p->msg, msg); p->flag = 1; - printf("[RUN]\tsignal USR1\n"); + ksft_print_msg("[RUN]\tsignal USR1\n"); err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags != SS_DISABLE) - printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n", + ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n", stk.ss_flags); else - printf("[OK]\tsigaltstack is disabled in sighandler\n"); + ksft_test_result_pass( + "sigaltstack is disabled in sighandler\n"); swapcontext(&sc, &uc); - printf("%s\n", p->msg); + ksft_print_msg("%s\n", p->msg); if (!p->flag) { - printf("[RUN]\tAborting\n"); + ksft_exit_skip("[RUN]\tAborting\n"); exit(EXIT_FAILURE); } } @@ -74,13 +76,13 @@ void my_usr2(int sig, siginfo_t *si, void *u) char *aa; struct stk_data *p; - printf("[RUN]\tsignal USR2\n"); + ksft_print_msg("[RUN]\tsignal USR2\n"); aa = alloca(1024); /* dont run valgrind on this */ /* try to find the data stored by previous sighandler */ p = memmem(aa, 1024, msg, strlen(msg)); if (p) { - printf("[FAIL]\tsigaltstack re-used\n"); + ksft_test_result_fail("sigaltstack re-used\n"); /* corrupt the data */ strcpy(p->msg, msg2); /* tell other sighandler that his data is corrupted */ @@ -90,7 +92,7 @@ void my_usr2(int sig, siginfo_t *si, void *u) static void switch_fn(void) { - printf("[RUN]\tswitched to user ctx\n"); + ksft_print_msg("[RUN]\tswitched to user ctx\n"); raise(SIGUSR2); setcontext(&sc); } @@ -101,6 +103,8 @@ int main(void) stack_t stk; int err; + ksft_print_header(); + sigemptyset(&act.sa_mask); act.sa_flags = SA_ONSTACK | SA_SIGINFO; act.sa_sigaction = my_usr1; @@ -110,19 +114,20 @@ int main(void) sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (sstack == MAP_FAILED) { - perror("mmap()"); + ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); return EXIT_FAILURE; } err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags == SS_DISABLE) { - printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n"); + ksft_test_result_pass( + "Initial sigaltstack state was SS_DISABLE\n"); } else { - printf("[FAIL]\tInitial sigaltstack state was %x; " + ksft_exit_fail_msg("Initial sigaltstack state was %x; " "should have been SS_DISABLE\n", stk.ss_flags); return EXIT_FAILURE; } @@ -133,7 +138,8 @@ int main(void) err = sigaltstack(&stk, NULL); if (err) { if (errno == EINVAL) { - printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); + ksft_exit_skip( + "[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); /* * If test cases for the !SS_AUTODISARM variant were * added, we could still run them. We don't have any @@ -142,7 +148,9 @@ int main(void) */ return 0; } else { - perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)"); + ksft_exit_fail_msg( + "sigaltstack(SS_ONSTACK | SS_AUTODISARM) %s\n", + strerror(errno)); return EXIT_FAILURE; } } @@ -150,7 +158,7 @@ int main(void) ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (ustack == MAP_FAILED) { - perror("mmap()"); + ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); return EXIT_FAILURE; } getcontext(&uc); @@ -162,16 +170,17 @@ int main(void) err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags != SS_AUTODISARM) { - printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n", + ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n", stk.ss_flags); exit(EXIT_FAILURE); } - printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n"); + ksft_test_result_pass( + "sigaltstack is still SS_AUTODISARM after signal\n"); - printf("[OK]\tTest passed\n"); + ksft_exit_pass(); return 0; } diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore new file mode 100644 index 000000000000..1e23fefd68e8 --- /dev/null +++ b/tools/testing/selftests/splice/.gitignore @@ -0,0 +1 @@ +default_file_splice_read diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile index 9fc78e5e5451..7e1187e007fa 100644 --- a/tools/testing/selftests/splice/Makefile +++ b/tools/testing/selftests/splice/Makefile @@ -1,7 +1,4 @@ TEST_PROGS := default_file_splice_read.sh -EXTRA := default_file_splice_read -all: $(TEST_PROGS) $(EXTRA) +TEST_GEN_PROGS_EXTENDED := default_file_splice_read include ../lib.mk - -EXTRA_CLEAN := $(EXTRA) diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 62fa666e501a..7f7938263c5c 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -31,62 +31,83 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/wait.h> +#include <errno.h> +#include <string.h> +#include "../kselftest.h" #include "synctest.h" static int run_test(int (*test)(void), char *name) { int result; pid_t childpid; + int ret; fflush(stdout); childpid = fork(); if (childpid) { waitpid(childpid, &result, 0); - if (WIFEXITED(result)) - return WEXITSTATUS(result); + if (WIFEXITED(result)) { + ret = WEXITSTATUS(result); + if (!ret) + ksft_test_result_pass("[RUN]\t%s\n", name); + else + ksft_test_result_fail("[RUN]\t%s\n", name); + return ret; + } return 1; } - printf("[RUN]\tExecuting %s\n", name); exit(test()); } -static int sync_api_supported(void) +static void sync_api_supported(void) { struct stat sbuf; + int ret; - return 0 == stat("/sys/kernel/debug/sync/sw_sync", &sbuf); + ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf); + if (!ret) + return; + + if (errno == ENOENT) + ksft_exit_skip("Sync framework not supported by kernel\n"); + + if (errno == EACCES) + ksft_exit_skip("Run Sync test as root.\n"); + + ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s", + strerror(errno)); } int main(void) { - int err = 0; + int err; - if (!sync_api_supported()) { - printf("SKIP: Sync framework not supported by kernel\n"); - return 0; - } + ksft_print_header(); + + sync_api_supported(); - printf("[RUN]\tTesting sync framework\n"); + ksft_print_msg("[RUN]\tTesting sync framework\n"); - err += RUN_TEST(test_alloc_timeline); - err += RUN_TEST(test_alloc_fence); - err += RUN_TEST(test_alloc_fence_negative); + RUN_TEST(test_alloc_timeline); + RUN_TEST(test_alloc_fence); + RUN_TEST(test_alloc_fence_negative); - err += RUN_TEST(test_fence_one_timeline_wait); - err += RUN_TEST(test_fence_one_timeline_merge); - err += RUN_TEST(test_fence_merge_same_fence); - err += RUN_TEST(test_fence_multi_timeline_wait); - err += RUN_TEST(test_stress_two_threads_shared_timeline); - err += RUN_TEST(test_consumer_stress_multi_producer_single_consumer); - err += RUN_TEST(test_merge_stress_random_merge); + RUN_TEST(test_fence_one_timeline_wait); + RUN_TEST(test_fence_one_timeline_merge); + RUN_TEST(test_fence_merge_same_fence); + RUN_TEST(test_fence_multi_timeline_wait); + RUN_TEST(test_stress_two_threads_shared_timeline); + RUN_TEST(test_consumer_stress_multi_producer_single_consumer); + RUN_TEST(test_merge_stress_random_merge); + err = ksft_get_fail_cnt(); if (err) - printf("[FAIL]\tsync errors: %d\n", err); - else - printf("[OK]\tsync\n"); + ksft_exit_fail_msg("%d out of %d sync tests failed\n", + err, ksft_test_num()); - return !!err; + /* need this return to keep gcc happy */ + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h index e7d1d57dba7a..90a8e5369914 100644 --- a/tools/testing/selftests/sync/synctest.h +++ b/tools/testing/selftests/sync/synctest.h @@ -29,10 +29,11 @@ #define SELFTESTS_SYNCTEST_H #include <stdio.h> +#include "../kselftest.h" #define ASSERT(cond, msg) do { \ if (!(cond)) { \ - printf("[ERROR]\t%s", (msg)); \ + ksft_print_msg("[ERROR]\t%s", (msg)); \ return 1; \ } \ } while (0) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index a9b86133b9b3..ae4593115408 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,5 +1,4 @@ -BUILD_FLAGS = -DKTEST -CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +CFLAGS += -O3 -Wl,-no-as-needed -Wall LDFLAGS += -lrt -lpthread -lm # these are all "safe" tests that don't modify @@ -7,9 +6,11 @@ LDFLAGS += -lrt -lpthread -lm TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest -TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \ +DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch freq-step leap-a-day \ - leapcrash set-tai set-2038 set-tz rtctest_setdate + leapcrash set-tai set-2038 set-tz + +TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate include ../lib.mk @@ -18,16 +19,4 @@ include ../lib.mk # and may modify the system time or trigger # other behavior like suspend run_destructive_tests: run_tests - ./alarmtimer-suspend - ./valid-adjtimex - ./adjtick - ./change_skew - ./skew_consistency - ./clocksource-switch - ./freq-step - ./leap-a-day -s -i 10 - ./leapcrash - ./set-tz - ./set-tai - ./set-2038 - + $(call RUN_TESTS, $(DESTRUCTIVE_TESTS)) diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 9887fd538fec..0caca3a06bd2 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -23,18 +23,7 @@ #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_MONOTONIC_RAW 4 diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 2b361b830395..4da09dbf83ba 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -28,18 +28,7 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c index cb1968977c04..c4eab7124990 100644 --- a/tools/testing/selftests/timers/change_skew.c +++ b/tools/testing/selftests/timers/change_skew.c @@ -28,18 +28,7 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index 5ff165373f8b..bfc974b4572d 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -34,18 +34,7 @@ #include <fcntl.h> #include <string.h> #include <sys/wait.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int get_clocksources(char list[][30]) @@ -61,7 +50,7 @@ int get_clocksources(char list[][30]) close(fd); - for (i = 0; i < 30; i++) + for (i = 0; i < 10; i++) list[i][0] = '\0'; head = buf; diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 74c60e8759a0..022d3ffe3fbf 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -28,18 +28,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CALLS_PER_LOOP 64 #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index fb46ad6ac92c..19e46ed5dfb5 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -48,18 +48,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL #define CLOCK_TAI 11 @@ -190,18 +179,18 @@ int main(int argc, char **argv) struct sigevent se; struct sigaction act; int signum = SIGRTMAX; - int settime = 0; + int settime = 1; int tai_time = 0; int insert = 1; - int iterations = -1; + int iterations = 10; int opt; /* Process arguments */ while ((opt = getopt(argc, argv, "sti:")) != -1) { switch (opt) { - case 's': - printf("Setting time to speed up testing\n"); - settime = 1; + case 'w': + printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n"); + settime = 0; break; case 'i': iterations = atoi(optarg); @@ -210,9 +199,10 @@ int main(int argc, char **argv) tai_time = 1; break; default: - printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]); - printf(" -s: Set time to right before leap second each iteration\n"); - printf(" -i: Number of iterations\n"); + printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]); + printf(" -w: Set flag and wait for leap second each iteration"); + printf(" (default sets time to right before leapsecond)\n"); + printf(" -i: Number of iterations (-1 = infinite, default is 10)\n"); printf(" -t: Print TAI time\n"); exit(-1); } diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index a1071bdbdeb7..830c462f605d 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -22,20 +22,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - - /* clear NTP time_status & time_state */ int clear_time_state(void) diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index a2a3924d0b41..1867db5d6f5e 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -29,18 +29,7 @@ #include <signal.h> #include <errno.h> #include <mqueue.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index ff942ff7c9b3..8adb0bb51d4d 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -27,18 +27,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index 2d7898fda0f1..c3c3dc10db17 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -24,18 +24,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 30906bfd9c1b..ca6cd146aafe 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -25,19 +25,7 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - #define CLOCK_MONOTONIC_RAW 4 #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c index f61170f7b024..411eff625e66 100644 --- a/tools/testing/selftests/timers/rtctest.c +++ b/tools/testing/selftests/timers/rtctest.c @@ -221,6 +221,11 @@ test_READ: /* Read the current alarm settings */ retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); if (retval == -1) { + if (errno == EINVAL) { + fprintf(stderr, + "\n...EINVAL reading current alarm setting.\n"); + goto test_PIE; + } perror("RTC_ALM_READ ioctl"); exit(errno); } @@ -231,7 +236,7 @@ test_READ: /* Enable alarm interrupts */ retval = ioctl(fd, RTC_AIE_ON, 0); if (retval == -1) { - if (errno == EINVAL) { + if (errno == EINVAL || errno == EIO) { fprintf(stderr, "\n...Alarm IRQs not supported.\n"); goto test_PIE; diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c index c8a7e14446b1..688cfd81b531 100644 --- a/tools/testing/selftests/timers/set-2038.c +++ b/tools/testing/selftests/timers/set-2038.c @@ -27,18 +27,7 @@ #include <unistd.h> #include <time.h> #include <sys/time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c index dc88dbc8831f..70fed27d8fd3 100644 --- a/tools/testing/selftests/timers/set-tai.c +++ b/tools/testing/selftests/timers/set-tai.c @@ -23,18 +23,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int set_tai(int offset) { diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 15434da23b04..9c92b7bd5641 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -28,18 +28,7 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c index f4184928b16b..877fd5532fee 100644 --- a/tools/testing/selftests/timers/set-tz.c +++ b/tools/testing/selftests/timers/set-tz.c @@ -23,18 +23,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int set_tz(int min, int dst) { diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index 2a996e072259..022b711c78ee 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -35,18 +35,7 @@ #include <stdlib.h> #include <string.h> #include <sys/wait.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index e632e116f05e..759c9c06f1a0 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -21,19 +21,7 @@ #include <stdlib.h> #include <sys/time.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - /* serializes shared list access */ pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 60fe3c569bd9..d9d3ab93b31a 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -32,18 +32,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL #define USEC_PER_SEC 1000000LL diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index a74c9d739d07..a1391be2dc1e 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -9,12 +9,25 @@ #include <unistd.h> #include <fcntl.h> #include <signal.h> +#include <getopt.h> #include <sys/ioctl.h> #include <linux/types.h> #include <linux/watchdog.h> +#define DEFAULT_PING_RATE 1 + int fd; const char v = 'V'; +static const char sopts[] = "bdehp:t:"; +static const struct option lopts[] = { + {"bootstatus", no_argument, NULL, 'b'}, + {"disable", no_argument, NULL, 'd'}, + {"enable", no_argument, NULL, 'e'}, + {"help", no_argument, NULL, 'h'}, + {"pingrate", required_argument, NULL, 'p'}, + {"timeout", required_argument, NULL, 't'}, + {NULL, no_argument, NULL, 0x0} +}; /* * This function simply sends an IOCTL to the driver, which in turn ticks @@ -23,12 +36,12 @@ const char v = 'V'; */ static void keep_alive(void) { - int dummy; - int ret; + int dummy; + int ret; - ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy); - if (!ret) - printf("."); + ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy); + if (!ret) + printf("."); } /* @@ -38,75 +51,110 @@ static void keep_alive(void) static void term(int sig) { - int ret = write(fd, &v, 1); - - close(fd); - if (ret < 0) - printf("\nStopping watchdog ticks failed (%d)...\n", errno); - else - printf("\nStopping watchdog ticks...\n"); - exit(0); + int ret = write(fd, &v, 1); + + close(fd); + if (ret < 0) + printf("\nStopping watchdog ticks failed (%d)...\n", errno); + else + printf("\nStopping watchdog ticks...\n"); + exit(0); +} + +static void usage(char *progname) +{ + printf("Usage: %s [options]\n", progname); + printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); + printf(" -d, --disable Turn off the watchdog timer\n"); + printf(" -e, --enable Turn on the watchdog timer\n"); + printf(" -h, --help Print the help message\n"); + printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); + printf(" -t, --timeout=T Set timeout to T seconds\n"); + printf("\n"); + printf("Parameters are parsed left-to-right in real-time.\n"); + printf("Example: %s -d -t 10 -p 5 -e\n", progname); } int main(int argc, char *argv[]) { - int flags; - unsigned int ping_rate = 1; - int ret; - int i; - - setbuf(stdout, NULL); - - fd = open("/dev/watchdog", O_WRONLY); - - if (fd == -1) { - printf("Watchdog device not enabled.\n"); - exit(-1); - } - - for (i = 1; i < argc; i++) { - if (!strncasecmp(argv[i], "-d", 2)) { - flags = WDIOS_DISABLECARD; - ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); - if (!ret) - printf("Watchdog card disabled.\n"); - } else if (!strncasecmp(argv[i], "-e", 2)) { - flags = WDIOS_ENABLECARD; - ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); - if (!ret) - printf("Watchdog card enabled.\n"); - } else if (!strncasecmp(argv[i], "-t", 2) && argv[2]) { - flags = atoi(argv[i + 1]); - ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); - if (!ret) - printf("Watchdog timeout set to %u seconds.\n", flags); - i++; - } else if (!strncasecmp(argv[i], "-p", 2) && argv[2]) { - ping_rate = strtoul(argv[i + 1], NULL, 0); - printf("Watchdog ping rate set to %u seconds.\n", ping_rate); - i++; - } else { - printf("-d to disable, -e to enable, -t <n> to set " - "the timeout,\n-p <n> to set the ping rate, and "); - printf("run by itself to tick the card.\n"); - printf("Parameters are parsed left-to-right in real-time.\n"); - printf("Example: %s -d -t 10 -p 5 -e\n", argv[0]); - goto end; - } - } - - printf("Watchdog Ticking Away!\n"); - - signal(SIGINT, term); - - while(1) { - keep_alive(); - sleep(ping_rate); - } + int flags; + unsigned int ping_rate = DEFAULT_PING_RATE; + int ret; + int c; + int oneshot = 0; + + setbuf(stdout, NULL); + + fd = open("/dev/watchdog", O_WRONLY); + + if (fd == -1) { + printf("Watchdog device not enabled.\n"); + exit(-1); + } + + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + switch (c) { + case 'b': + flags = 0; + oneshot = 1; + ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags); + if (!ret) + printf("Last boot is caused by: %s.\n", (flags != 0) ? + "Watchdog" : "Power-On-Reset"); + else + printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno)); + break; + case 'd': + flags = WDIOS_DISABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card disabled.\n"); + else + printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno)); + break; + case 'e': + flags = WDIOS_ENABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card enabled.\n"); + else + printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno)); + break; + case 'p': + ping_rate = strtoul(optarg, NULL, 0); + if (!ping_rate) + ping_rate = DEFAULT_PING_RATE; + printf("Watchdog ping rate set to %u seconds.\n", ping_rate); + break; + case 't': + flags = strtoul(optarg, NULL, 0); + ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); + if (!ret) + printf("Watchdog timeout set to %u seconds.\n", flags); + else + printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno)); + break; + default: + usage(argv[0]); + goto end; + } + } + + if (oneshot) + goto end; + + printf("Watchdog Ticking Away!\n"); + + signal(SIGINT, term); + + while (1) { + keep_alive(); + sleep(ping_rate); + } end: - ret = write(fd, &v, 1); - if (ret < 0) - printf("Stopping watchdog ticks failed (%d)...\n", errno); - close(fd); - return 0; + ret = write(fd, &v, 1); + if (ret < 0) + printf("Stopping watchdog ticks failed (%d)...\n", errno); + close(fd); + return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a39a1e161e63..b9f68e4add71 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) && !v->arch.power_off && !v->arch.pause); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu_mode_priv(vcpu); +} + /* Just ensure a guest exit from a particular CPU */ static void exit_vm_noop(void *info) { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2ea21dac0b44..b36945d49986 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1454,25 +1454,6 @@ out: kvm_set_pfn_accessed(pfn); } -static bool is_abort_sea(unsigned long fault_status) -{ - switch (fault_status) { - case FSC_SEA: - case FSC_SEA_TTW0: - case FSC_SEA_TTW1: - case FSC_SEA_TTW2: - case FSC_SEA_TTW3: - case FSC_SECC: - case FSC_SECC_TTW0: - case FSC_SECC_TTW1: - case FSC_SECC_TTW2: - case FSC_SECC_TTW3: - return true; - default: - return false; - } -} - /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1498,20 +1479,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) fault_status = kvm_vcpu_trap_get_fault_type(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - /* - * The host kernel will handle the synchronous external abort. There - * is no need to pass the error into the guest. - */ - if (is_abort_sea(fault_status)) { + /* Synchronous External Abort? */ + if (kvm_vcpu_dabt_isextabt(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) return 1; - } - is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { - kvm_inject_vabt(vcpu); - return 1; + if (unlikely(!is_iabt)) { + kvm_inject_vabt(vcpu); + return 1; + } } trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 7072ab743332..10b38178cff2 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -234,7 +234,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations vgic_debug_seq_ops = { +static const struct seq_operations vgic_debug_seq_ops = { .start = vgic_debug_start, .next = vgic_debug_next, .stop = vgic_debug_stop, @@ -255,7 +255,7 @@ static int debug_open(struct inode *inode, struct file *file) return ret; }; -static struct file_operations vgic_debug_fops = { +static const struct file_operations vgic_debug_fops = { .owner = THIS_MODULE, .open = debug_open, .read = seq_read, diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index aa6b68db80b4..f51c1e1b3f70 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -144,7 +144,6 @@ struct its_ite { struct vgic_irq *irq; struct its_collection *collection; - u32 lpi; u32 event_id; }; @@ -813,7 +812,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) /* Must be called with its_lock mutex held */ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, struct its_collection *collection, - u32 lpi_id, u32 event_id) + u32 event_id) { struct its_ite *ite; @@ -823,7 +822,6 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, ite->event_id = event_id; ite->collection = collection; - ite->lpi = lpi_id; list_add_tail(&ite->ite_list, &device->itt_head); return ite; @@ -873,7 +871,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, new_coll = collection; } - ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id); + ite = vgic_its_alloc_ite(device, collection, event_id); if (IS_ERR(ite)) { if (new_coll) vgic_its_free_collection(its, coll_id); @@ -1848,7 +1846,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, next_offset = compute_next_eventid_offset(&dev->itt_head, ite); val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | - ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) | + ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | ite->collection->collection_id; val = cpu_to_le64(val); return kvm_write_guest(kvm, gpa, &val, ite_esz); @@ -1895,7 +1893,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, if (!collection) return -EINVAL; - ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id); + ite = vgic_its_alloc_ite(dev, collection, event_id); if (IS_ERR(ite)) return PTR_ERR(ite); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 37522e65eb53..b3d4a10f09a1 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -303,6 +303,51 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, vgic_set_vmcr(vcpu, &vmcr); } +static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return 0; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return 0; + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + return vgicv3->vgic_ap1r[n]; + } +} + +static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return; + vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return; + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + vgicv3->vgic_ap1r[n] = val; + } +} + static const struct vgic_register_region vgic_v2_dist_registers[] = { REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, @@ -364,7 +409,7 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = { vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, - vgic_mmio_read_raz, vgic_mmio_write_wi, 16, + vgic_mmio_read_apr, vgic_mmio_write_apr, 16, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bba7fa22a7f7..bf9ceab67c77 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -220,4 +220,20 @@ int vgic_debug_destroy(struct kvm *kvm); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); +static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; + + /* + * num_pri_bits are initialized with HW supported values. + * We can rely safely on num_pri_bits even if VM has not + * restored ICC_CTLR_EL1 before restoring APnR registers. + */ + switch (cpu_if->num_pri_bits) { + case 7: return 3; + case 6: return 1; + default: return 0; + } +} + #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d81f6ded88e..6ed1c2021198 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1609,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages) { unsigned long addr; - gfn_t entry; + gfn_t entry = 0; addr = gfn_to_hva_many(slot, gfn, &entry); if (kvm_is_error_hva(addr)) @@ -1928,6 +1928,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, * verify that the entire region is valid here. */ while (start_gfn <= end_gfn) { + nr_pages_avail = 0; ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); @@ -2275,7 +2276,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } -void kvm_vcpu_on_spin(struct kvm_vcpu *me) +void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; @@ -2306,6 +2307,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; + if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; |