diff options
821 files changed, 11346 insertions, 6401 deletions
@@ -2808,8 +2808,7 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@it.uu.se -W: http://user.it.uu.se/~mikpe/linux/ +E: mikpelinux@gmail.com D: Miscellaneous fixes N: Reed H. Petty diff --git a/Documentation/ABI/stable/sysfs-bus-usb b/Documentation/ABI/stable/sysfs-bus-usb index 2be603c52a24..a6b685724740 100644 --- a/Documentation/ABI/stable/sysfs-bus-usb +++ b/Documentation/ABI/stable/sysfs-bus-usb @@ -37,8 +37,8 @@ Description: that the USB device has been connected to the machine. This file is read-only. Users: - PowerTOP <power@bughost.org> - http://www.lesswatts.org/projects/powertop/ + PowerTOP <powertop@lists.01.org> + https://01.org/powertop/ What: /sys/bus/usb/device/.../power/active_duration Date: January 2008 @@ -57,8 +57,8 @@ Description: will give an integer percentage. Note that this does not account for counter wrap. Users: - PowerTOP <power@bughost.org> - http://www.lesswatts.org/projects/powertop/ + PowerTOP <powertop@lists.01.org> + https://01.org/powertop/ What: /sys/bus/usb/devices/<busnum>-<port[.port]>...:<config num>-<interface num>/supports_autosuspend Date: January 2008 diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 9d43e7670841..efe449bdf811 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -1,6 +1,6 @@ What: /sys/devices/.../power/ Date: January 2009 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power directory contains attributes allowing the user space to check and modify some power @@ -8,7 +8,7 @@ Description: What: /sys/devices/.../power/wakeup Date: January 2009 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power/wakeup attribute allows the user space to check if the device is enabled to wake up the system @@ -34,7 +34,7 @@ Description: What: /sys/devices/.../power/control Date: January 2009 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power/control attribute allows the user space to control the run-time power management of the device. @@ -53,7 +53,7 @@ Description: What: /sys/devices/.../power/async Date: January 2009 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../async attribute allows the user space to enable or diasble the device's suspend and resume callbacks to @@ -79,7 +79,7 @@ Description: What: /sys/devices/.../power/wakeup_count Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_count attribute contains the number of signaled wakeup events associated with the device. This @@ -88,7 +88,7 @@ Description: What: /sys/devices/.../power/wakeup_active_count Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_active_count attribute contains the number of times the processing of wakeup events associated with @@ -98,7 +98,7 @@ Description: What: /sys/devices/.../power/wakeup_abort_count Date: February 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_abort_count attribute contains the number of times the processing of a wakeup event associated with @@ -109,7 +109,7 @@ Description: What: /sys/devices/.../power/wakeup_expire_count Date: February 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_expire_count attribute contains the number of times a wakeup event associated with the device has @@ -119,7 +119,7 @@ Description: What: /sys/devices/.../power/wakeup_active Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_active attribute contains either 1, or 0, depending on whether or not a wakeup event associated with @@ -129,7 +129,7 @@ Description: What: /sys/devices/.../power/wakeup_total_time_ms Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_total_time_ms attribute contains the total time of processing wakeup events associated with the @@ -139,7 +139,7 @@ Description: What: /sys/devices/.../power/wakeup_max_time_ms Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_max_time_ms attribute contains the maximum time of processing a single wakeup event associated @@ -149,7 +149,7 @@ Description: What: /sys/devices/.../power/wakeup_last_time_ms Date: September 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_last_time_ms attribute contains the value of the monotonic clock corresponding to the time of @@ -160,7 +160,7 @@ Description: What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms Date: February 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute contains the total time the device has been preventing @@ -189,7 +189,7 @@ Description: What: /sys/devices/.../power/pm_qos_latency_us Date: March 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power/pm_qos_resume_latency_us attribute contains the PM QoS resume latency limit for the given device, @@ -207,7 +207,7 @@ Description: What: /sys/devices/.../power/pm_qos_no_power_off Date: September 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power/pm_qos_no_power_off attribute is used for manipulating the PM QoS "no power off" flag. If @@ -222,7 +222,7 @@ Description: What: /sys/devices/.../power/pm_qos_remote_wakeup Date: September 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../power/pm_qos_remote_wakeup attribute is used for manipulating the PM QoS "remote wakeup required" diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 217772615d02..205a73878441 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -1,6 +1,6 @@ What: /sys/power/ Date: August 2006 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power directory will contain files that will provide a unified interface to the power management @@ -8,7 +8,7 @@ Description: What: /sys/power/state Date: August 2006 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/state file controls the system power state. Reading from this file returns what states are supported, @@ -22,7 +22,7 @@ Description: What: /sys/power/disk Date: September 2006 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/disk file controls the operating mode of the suspend-to-disk mechanism. Reading from this file returns @@ -67,7 +67,7 @@ Description: What: /sys/power/image_size Date: August 2006 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/image_size file controls the size of the image created by the suspend-to-disk mechanism. It can be written a @@ -84,7 +84,7 @@ Description: What: /sys/power/pm_trace Date: August 2006 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/pm_trace file controls the code which saves the last PM event point in the RTC across reboots, so that you can @@ -133,7 +133,7 @@ Description: What: /sys/power/pm_async Date: January 2009 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/pm_async file controls the switch allowing the user space to enable or disable asynchronous suspend and resume @@ -146,7 +146,7 @@ Description: What: /sys/power/wakeup_count Date: July 2010 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/wakeup_count file allows user space to put the system into a sleep state while taking into account the @@ -161,7 +161,7 @@ Description: What: /sys/power/reserved_size Date: May 2011 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/reserved_size file allows user space to control the amount of memory reserved for allocations made by device @@ -175,7 +175,7 @@ Description: What: /sys/power/autosleep Date: April 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/autosleep file can be written one of the strings returned by reads from /sys/power/state. If that happens, a @@ -192,7 +192,7 @@ Description: What: /sys/power/wake_lock Date: February 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/wake_lock file allows user space to create wakeup source objects and activate them on demand (if one of @@ -219,7 +219,7 @@ Description: What: /sys/power/wake_unlock Date: February 2012 -Contact: Rafael J. Wysocki <rjw@sisk.pl> +Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/power/wake_unlock file allows user space to deactivate wakeup sources created with the help of /sys/power/wake_lock. diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/acpi/dsdt-override.txt index febbb1ba4d23..784841caa6e6 100644 --- a/Documentation/acpi/dsdt-override.txt +++ b/Documentation/acpi/dsdt-override.txt @@ -4,4 +4,4 @@ CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel. When to use this method is described in detail on the Linux/ACPI home page: -http://www.lesswatts.org/projects/acpi/overridingDSDT.php +https://01.org/linux-acpi/documentation/overriding-dsdt diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index d18ecd827c40..929d9904f74b 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -6,6 +6,8 @@ capability.txt - Generic Block Device Capability (/sys/block/<device>/capability) cfq-iosched.txt - CFQ IO scheduler tunables +cmdline-partition.txt + - how to specify block device partitions on kernel command line data-integrity.txt - Block data integrity deadline-iosched.txt diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt index 2bbf4cc40c3f..525b9f6d7fb4 100644 --- a/Documentation/block/cmdline-partition.txt +++ b/Documentation/block/cmdline-partition.txt @@ -1,9 +1,9 @@ -Embedded device command line partition +Embedded device command line partition parsing ===================================================================== -Read block device partition table from command line. -The partition used for fixed block device (eMMC) embedded device. -It is no MBR, save storage space. Bootloader can be easily accessed +Support for reading the block device partition table from the command line. +It is typically used for fixed block (eMMC) embedded devices. +It has no MBR, so saves storage space. Bootloader can be easily accessed by absolute address of data on the block device. Users can easily change the partition. diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt deleted file mode 100644 index eb2469365593..000000000000 --- a/Documentation/devicetree/bindings/memory.txt +++ /dev/null @@ -1,168 +0,0 @@ -*** Memory binding *** - -The /memory node provides basic information about the address and size -of the physical memory. This node is usually filled or updated by the -bootloader, depending on the actual memory configuration of the given -hardware. - -The memory layout is described by the following node: - -/ { - #address-cells = <(n)>; - #size-cells = <(m)>; - memory { - device_type = "memory"; - reg = <(baseaddr1) (size1) - (baseaddr2) (size2) - ... - (baseaddrN) (sizeN)>; - }; - ... -}; - -A memory node follows the typical device tree rules for "reg" property: -n: number of cells used to store base address value -m: number of cells used to store size value -baseaddrX: defines a base address of the defined memory bank -sizeX: the size of the defined memory bank - - -More than one memory bank can be defined. - - -*** Reserved memory regions *** - -In /memory/reserved-memory node one can create child nodes describing -particular reserved (excluded from normal use) memory regions. Such -memory regions are usually designed for the special usage by various -device drivers. A good example are contiguous memory allocations or -memory sharing with other operating system on the same hardware board. -Those special memory regions might depend on the board configuration and -devices used on the target system. - -Parameters for each memory region can be encoded into the device tree -with the following convention: - -[(label):] (name) { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <(address) (size)>; - (linux,default-contiguous-region); -}; - -compatible: one or more of: - - "linux,contiguous-memory-region" - enables binding of this - region to Contiguous Memory Allocator (special region for - contiguous memory allocations, shared with movable system - memory, Linux kernel-specific). - - "reserved-memory-region" - compatibility is defined, given - region is assigned for exclusive usage for by the respective - devices. - -reg: standard property defining the base address and size of - the memory region - -linux,default-contiguous-region: property indicating that the region - is the default region for all contiguous memory - allocations, Linux specific (optional) - -It is optional to specify the base address, so if one wants to use -autoconfiguration of the base address, '0' can be specified as a base -address in the 'reg' property. - -The /memory/reserved-memory node must contain the same #address-cells -and #size-cells value as the root node. - - -*** Device node's properties *** - -Once regions in the /memory/reserved-memory node have been defined, they -may be referenced by other device nodes. Bindings that wish to reference -memory regions should explicitly document their use of the following -property: - -memory-region = <&phandle_to_defined_region>; - -This property indicates that the device driver should use the memory -region pointed by the given phandle. - - -*** Example *** - -This example defines a memory consisting of 4 memory banks. 3 contiguous -regions are defined for Linux kernel, one default of all device drivers -(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the -framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB) -and one for multimedia processing (labelled multimedia_mem, placed at -0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000 -device for DMA memory allocations (Linux kernel drivers will use CMA is -available or dma-exclusive usage otherwise). 'multimedia_mem' is -assigned to scaler@12500000 and codec@12600000 devices for contiguous -memory allocations when CMA driver is enabled. - -The reason for creating a separate region for framebuffer device is to -match the framebuffer base address to the one configured by bootloader, -so once Linux kernel drivers starts no glitches on the displayed boot -logo appears. Scaller and codec drivers should share the memory -allocations. - -/ { - #address-cells = <1>; - #size-cells = <1>; - - /* ... */ - - memory { - reg = <0x40000000 0x10000000 - 0x50000000 0x10000000 - 0x60000000 0x10000000 - 0x70000000 0x10000000>; - - reserved-memory { - #address-cells = <1>; - #size-cells = <1>; - - /* - * global autoconfigured region for contiguous allocations - * (used only with Contiguous Memory Allocator) - */ - contig_region@0 { - compatible = "linux,contiguous-memory-region"; - reg = <0x0 0x4000000>; - linux,default-contiguous-region; - }; - - /* - * special region for framebuffer - */ - display_region: region@78000000 { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <0x78000000 0x800000>; - }; - - /* - * special region for multimedia processing devices - */ - multimedia_region: region@77000000 { - compatible = "linux,contiguous-memory-region"; - reg = <0x77000000 0x4000000>; - }; - }; - }; - - /* ... */ - - fb0: fb@12300000 { - status = "okay"; - memory-region = <&display_region>; - }; - - scaler: scaler@12500000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; - - codec: codec@12600000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; -}; diff --git a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt index 6d1c0988cfc7..c67b975c8906 100644 --- a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt @@ -1,11 +1,11 @@ -* Samsung Exynos specific extensions to the Synopsis Designware Mobile +* Samsung Exynos specific extensions to the Synopsys Designware Mobile Storage Host Controller -The Synopsis designware mobile storage host controller is used to interface +The Synopsys designware mobile storage host controller is used to interface a SoC with storage medium such as eMMC or SD/MMC cards. This file documents -differences between the core Synopsis dw mshc controller properties described -by synopsis-dw-mshc.txt and the properties used by the Samsung Exynos specific -extensions to the Synopsis Designware Mobile Storage Host Controller. +differences between the core Synopsys dw mshc controller properties described +by synopsys-dw-mshc.txt and the properties used by the Samsung Exynos specific +extensions to the Synopsys Designware Mobile Storage Host Controller. Required Properties: diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt index 8a3d91d47b6a..c559f3f36309 100644 --- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt @@ -1,11 +1,11 @@ -* Rockchip specific extensions to the Synopsis Designware Mobile +* Rockchip specific extensions to the Synopsys Designware Mobile Storage Host Controller -The Synopsis designware mobile storage host controller is used to interface +The Synopsys designware mobile storage host controller is used to interface a SoC with storage medium such as eMMC or SD/MMC cards. This file documents -differences between the core Synopsis dw mshc controller properties described -by synopsis-dw-mshc.txt and the properties used by the Rockchip specific -extensions to the Synopsis Designware Mobile Storage Host Controller. +differences between the core Synopsys dw mshc controller properties described +by synopsys-dw-mshc.txt and the properties used by the Rockchip specific +extensions to the Synopsys Designware Mobile Storage Host Controller. Required Properties: diff --git a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt index cdcebea9c6f5..066a78b034ca 100644 --- a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt @@ -1,14 +1,14 @@ -* Synopsis Designware Mobile Storage Host Controller +* Synopsys Designware Mobile Storage Host Controller -The Synopsis designware mobile storage host controller is used to interface +The Synopsys designware mobile storage host controller is used to interface a SoC with storage medium such as eMMC or SD/MMC cards. This file documents differences between the core mmc properties described by mmc.txt and the -properties used by the Synopsis Designware Mobile Storage Host Controller. +properties used by the Synopsys Designware Mobile Storage Host Controller. Required Properties: * compatible: should be - - snps,dw-mshc: for controllers compliant with synopsis dw-mshc. + - snps,dw-mshc: for controllers compliant with synopsys dw-mshc. * #address-cells: should be 1. * #size-cells: should be 0. diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt index df204e18e030..6a2a1160a70d 100644 --- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt +++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt @@ -9,12 +9,15 @@ compulsory and any optional properties, common to all SD/MMC drivers, as described in mmc.txt, can be used. Additionally the following tmio_mmc-specific optional bindings can be used. +Required properties: +- compatible: "renesas,sdhi-shmobile" - a generic sh-mobile SDHI unit + "renesas,sdhi-sh7372" - SDHI IP on SH7372 SoC + "renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC + "renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC + "renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC + "renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC + "renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC + "renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC + Optional properties: - toshiba,mmc-wrprotect-disable: write-protect detection is unavailable - -When used with Renesas SDHI hardware, the following compatibility strings -configure various model-specific properties: - -"renesas,sh7372-sdhi": (default) compatible with SH7372 -"renesas,r8a7740-sdhi": compatible with R8A7740: certain MMC/SD commands have to - wait for the interface to become idle. diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt index 2c6be0377f55..d2ea4605d078 100644 --- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt +++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt @@ -86,6 +86,7 @@ General Properties: Clock Properties: + - fsl,cksel Timer reference clock source. - fsl,tclk-period Timer reference clock period in nanoseconds. - fsl,tmr-prsc Prescaler, divides the output clock. - fsl,tmr-add Frequency compensation value. @@ -97,7 +98,7 @@ Clock Properties: clock. You must choose these carefully for the clock to work right. Here is how to figure good values: - TimerOsc = system clock MHz + TimerOsc = selected reference clock MHz tclk_period = desired clock period nanoseconds NominalFreq = 1000 / tclk_period MHz FreqDivRatio = TimerOsc / NominalFreq (must be greater that 1.0) @@ -114,6 +115,20 @@ Clock Properties: Pulse Per Second (PPS) signal, since this will be offered to the PPS subsystem to synchronize the Linux clock. + Reference clock source is determined by the value, which is holded + in CKSEL bits in TMR_CTRL register. "fsl,cksel" property keeps the + value, which will be directly written in those bits, that is why, + according to reference manual, the next clock sources can be used: + + <0> - external high precision timer reference clock (TSEC_TMR_CLK + input is used for this purpose); + <1> - eTSEC system clock; + <2> - eTSEC1 transmit clock; + <3> - RTC clock input. + + When this attribute is not used, eTSEC system clock will serve as + IEEE 1588 timer reference clock. + Example: ptp_clock@24E00 { @@ -121,6 +136,7 @@ Example: reg = <0x24E00 0xB0>; interrupts = <12 0x8 13 0x8>; interrupt-parent = < &ipic >; + fsl,cksel = <1>; fsl,tclk-period = <10>; fsl,tmr-prsc = <100>; fsl,tmr-add = <0x999999A4>; diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt index eabcb4b5db6e..e216af356847 100644 --- a/Documentation/devicetree/bindings/pci/designware-pcie.txt +++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt @@ -1,4 +1,4 @@ -* Synopsis Designware PCIe interface +* Synopsys Designware PCIe interface Required properties: - compatible: should contain "snps,dw-pcie" to identify the diff --git a/Documentation/devicetree/bindings/tty/serial/qca,ar9330-uart.txt b/Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt index c5e032c85bf9..c5e032c85bf9 100644 --- a/Documentation/devicetree/bindings/tty/serial/qca,ar9330-uart.txt +++ b/Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1a036cd972fb..fcbb736d55fe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -480,6 +480,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: <io>,<irq>,<mode> See header of drivers/net/hamradio/baycom_ser_hdx.c. + blkdevparts= Manual partition parsing of block device(s) for + embedded devices based on command line input. + See Documentation/block/cmdline-partition.txt + boot_delay= Milliseconds to delay each printk during boot. Values larger than 10 seconds (10000) are changed to no delay (0). @@ -1357,7 +1361,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. pages. In the event, a node is too small to have both kernelcore and Movable pages, kernelcore pages will take priority and other nodes will have a larger number - of kernelcore pages. The Movable zone is used for the + of Movable pages. The Movable zone is used for the allocation of pages that may be reclaimed or moved by the page migration subsystem. This means that HugeTLB pages may not be allocated from this zone. @@ -3485,6 +3489,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the unplug protocol never -- do not unplug even if version check succeeds + xen_nopvspin [X86,XEN] + Disables the ticketlock slowpath using Xen PV + optimizations. + xirc2ps_cs= [NET,PCMCIA] Format: <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index a46ddb85e83a..85c362d8ea34 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -28,6 +28,7 @@ ALC269/270/275/276/28x/29x alc269-dmic Enable ALC269(VA) digital mic workaround alc271-dmic Enable ALC271X digital mic workaround inv-dmic Inverted internal mic workaround + headset-mic Indicates a combined headset (headphone+mic) jack lenovo-dock Enables docking station I/O for some Lenovos dell-headset-multi Headset jack, which can also be used as mic-in dell-headset-dock Headset jack (without mic-in), and also dock I/O @@ -296,6 +297,12 @@ Cirrus Logic CS4206/4207 imac27 IMac 27 Inch auto BIOS setup (default) +Cirrus Logic CS4208 +=================== + mba6 MacBook Air 6,1 and 6,2 + gpio0 Enable GPIO 0 amp + auto BIOS setup (default) + VIA VT17xx/VT18xx/VT20xx ======================== auto BIOS setup (default) diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX new file mode 100644 index 000000000000..641ec9220179 --- /dev/null +++ b/Documentation/virtual/kvm/00-INDEX @@ -0,0 +1,24 @@ +00-INDEX + - this file. +api.txt + - KVM userspace API. +cpuid.txt + - KVM-specific cpuid leaves (x86). +devices/ + - KVM_CAP_DEVICE_CTRL userspace API. +hypercalls.txt + - KVM hypercalls. +locking.txt + - notes on KVM locks. +mmu.txt + - the x86 kvm shadow mmu. +msr.txt + - KVM-specific MSRs (x86). +nested-vmx.txt + - notes on nested virtualization for Intel x86 processors. +ppc-pv.txt + - the paravirtualization interface on PowerPC. +review-checklist.txt + - review checklist for KVM patches. +timekeeping.txt + - timekeeping virtualization for x86-based architectures. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 858aecf21db2..a30035dd4c26 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) struct kvm_cpuid_entry2 { __u32 function; @@ -1810,6 +1810,50 @@ registers, find a list below: PPC | KVM_REG_PPC_TLB3PS | 32 PPC | KVM_REG_PPC_EPTCFG | 32 PPC | KVM_REG_PPC_ICP_STATE | 64 + PPC | KVM_REG_PPC_TB_OFFSET | 64 + PPC | KVM_REG_PPC_SPMC1 | 32 + PPC | KVM_REG_PPC_SPMC2 | 32 + PPC | KVM_REG_PPC_IAMR | 64 + PPC | KVM_REG_PPC_TFHAR | 64 + PPC | KVM_REG_PPC_TFIAR | 64 + PPC | KVM_REG_PPC_TEXASR | 64 + PPC | KVM_REG_PPC_FSCR | 64 + PPC | KVM_REG_PPC_PSPB | 32 + PPC | KVM_REG_PPC_EBBHR | 64 + PPC | KVM_REG_PPC_EBBRR | 64 + PPC | KVM_REG_PPC_BESCR | 64 + PPC | KVM_REG_PPC_TAR | 64 + PPC | KVM_REG_PPC_DPDES | 64 + PPC | KVM_REG_PPC_DAWR | 64 + PPC | KVM_REG_PPC_DAWRX | 64 + PPC | KVM_REG_PPC_CIABR | 64 + PPC | KVM_REG_PPC_IC | 64 + PPC | KVM_REG_PPC_VTB | 64 + PPC | KVM_REG_PPC_CSIGR | 64 + PPC | KVM_REG_PPC_TACR | 64 + PPC | KVM_REG_PPC_TCSCR | 64 + PPC | KVM_REG_PPC_PID | 64 + PPC | KVM_REG_PPC_ACOP | 64 + PPC | KVM_REG_PPC_VRSAVE | 32 + PPC | KVM_REG_PPC_LPCR | 64 + PPC | KVM_REG_PPC_PPR | 64 + PPC | KVM_REG_PPC_ARCH_COMPAT 32 + PPC | KVM_REG_PPC_TM_GPR0 | 64 + ... + PPC | KVM_REG_PPC_TM_GPR31 | 64 + PPC | KVM_REG_PPC_TM_VSR0 | 128 + ... + PPC | KVM_REG_PPC_TM_VSR63 | 128 + PPC | KVM_REG_PPC_TM_CR | 64 + PPC | KVM_REG_PPC_TM_LR | 64 + PPC | KVM_REG_PPC_TM_CTR | 64 + PPC | KVM_REG_PPC_TM_FPSCR | 64 + PPC | KVM_REG_PPC_TM_AMR | 64 + PPC | KVM_REG_PPC_TM_PPR | 64 + PPC | KVM_REG_PPC_TM_VRSAVE | 64 + PPC | KVM_REG_PPC_TM_VSCR | 32 + PPC | KVM_REG_PPC_TM_DSCR | 64 + PPC | KVM_REG_PPC_TM_TAR | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -2304,7 +2348,31 @@ Possible features: Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). -4.83 KVM_GET_REG_LIST +4.83 KVM_ARM_PREFERRED_TARGET + +Capability: basic +Architectures: arm, arm64 +Type: vm ioctl +Parameters: struct struct kvm_vcpu_init (out) +Returns: 0 on success; -1 on error +Errors: + ENODEV: no preferred target available for the host + +This queries KVM for preferred CPU target type which can be emulated +by KVM on underlying host. + +The ioctl returns struct kvm_vcpu_init instance containing information +about preferred CPU target type and recommended features for it. The +kvm_vcpu_init->features bitmap returned will have feature bits set if +the preferred target recommends setting these features, but this is +not mandatory. + +The information returned by this ioctl can be used to prepare an instance +of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in +in VCPU matching underlying host. + + +4.84 KVM_GET_REG_LIST Capability: basic Architectures: arm, arm64 @@ -2323,8 +2391,7 @@ struct kvm_reg_list { This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. - -4.84 KVM_ARM_SET_DEVICE_ADDR +4.85 KVM_ARM_SET_DEVICE_ADDR Capability: KVM_CAP_ARM_SET_DEVICE_ADDR Architectures: arm, arm64 @@ -2362,7 +2429,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. -4.85 KVM_PPC_RTAS_DEFINE_TOKEN +4.86 KVM_PPC_RTAS_DEFINE_TOKEN Capability: KVM_CAP_PPC_RTAS Architectures: ppc @@ -2661,6 +2728,77 @@ and usually define the validity of a groups of registers. (e.g. one bit }; +4.81 KVM_GET_EMULATED_CPUID + +Capability: KVM_CAP_EXT_EMUL_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 flags; + struct kvm_cpuid_entry2 entries[0]; +}; + +The member 'flags' is used for passing flags from userspace. + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are emulated by +kvm.Userspace can use the information returned by this ioctl to query +which features are emulated by kvm instead of being present natively. + +Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 +structure with the 'nent' field indicating the number of entries in +the variable-size array 'entries'. If the number of entries is too low +to describe the cpu capabilities, an error (E2BIG) is returned. If the +number is too high, the 'nent' field is adjusted and an error (ENOMEM) +is returned. If the number is just right, the 'nent' field is adjusted +to the number of valid entries in the 'entries' array, which is then +filled. + +The entries returned are the set CPUID bits of the respective features +which kvm emulates, as returned by the CPUID instruction, with unknown +or unsupported feature bits cleared. + +Features like x2apic, for example, may not be present in the host cpu +but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be +emulated efficiently and thus not included here. + +The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + + 6. Capabilities that can be enabled ----------------------------------- diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 22ff659bc0fb..3c65feb83010 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -43,6 +43,13 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by || || writing to msr 0x4b564d02 ------------------------------------------------------------------------------ +KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by + || || writing to msr 0x4b564d03. +------------------------------------------------------------------------------ +KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt + || || handler can be enabled by writing + || || to msr 0x4b564d04. +------------------------------------------------------------------------------ KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit || || before enabling paravirtualized || || spinlock support. diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 41b7ac9884b5..f8869410d40c 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update(). ------------ Name: kvm_lock -Type: raw_spinlock +Type: spinlock_t Arch: any Protects: - vm_list - - hardware virtualization enable/disable + +Name: kvm_count_lock +Type: raw_spinlock_t +Arch: any +Protects: - hardware virtualization enable/disable Comment: 'raw' because hardware enabling/disabling must be atomic /wrt migration. @@ -151,3 +155,14 @@ Type: spinlock_t Arch: any Protects: -shadow page/shadow tlb entry Comment: it is a spinlock since it is used in mmu notifier. + +Name: kvm->srcu +Type: srcu lock +Arch: any +Protects: - kvm->memslots + - kvm->buses +Comment: The srcu read lock must be held while accessing memslots (e.g. + when using gfn_to_* functions) and while accessing in-kernel + MMIO/PIO address->device structure mapping (kvm->buses). + The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu + if it is needed by multiple functions. diff --git a/MAINTAINERS b/MAINTAINERS index e61c2e83fc2b..366f90cc6fdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -237,11 +237,11 @@ F: drivers/platform/x86/acer-wmi.c ACPI M: Len Brown <lenb@kernel.org> -M: Rafael J. Wysocki <rjw@sisk.pl> +M: Rafael J. Wysocki <rjw@rjwysocki.net> L: linux-acpi@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ -Q: http://patchwork.kernel.org/project/linux-acpi/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux +W: https://01.org/linux-acpi +Q: https://patchwork.kernel.org/project/linux-acpi/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm S: Supported F: drivers/acpi/ F: drivers/pnp/pnpacpi/ @@ -256,21 +256,21 @@ F: drivers/pci/*/*/*acpi* ACPI FAN DRIVER M: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ +W: https://01.org/linux-acpi S: Supported F: drivers/acpi/fan.c ACPI THERMAL DRIVER M: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ +W: https://01.org/linux-acpi S: Supported F: drivers/acpi/*thermal* ACPI VIDEO DRIVER M: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ +W: https://01.org/linux-acpi S: Supported F: drivers/acpi/video.c @@ -824,15 +824,21 @@ S: Maintained F: arch/arm/mach-gemini/ ARM/CSR SIRFPRIMA2 MACHINE SUPPORT -M: Barry Song <baohua.song@csr.com> +M: Barry Song <baohua@kernel.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git S: Maintained F: arch/arm/mach-prima2/ +F: drivers/clk/clk-prima2.c +F: drivers/clocksource/timer-prima2.c +F: drivers/clocksource/timer-marco.c F: drivers/dma/sirf-dma.c F: drivers/i2c/busses/i2c-sirf.c +F: drivers/input/misc/sirfsoc-onkey.c +F: drivers/irqchip/irq-sirfsoc.c F: drivers/mmc/host/sdhci-sirf.c F: drivers/pinctrl/sirf/ +F: drivers/rtc/rtc-sirfsoc.c F: drivers/spi/spi-sirf.c ARM/EBSA110 MACHINE SUPPORT @@ -1812,7 +1818,8 @@ S: Supported F: drivers/net/ethernet/broadcom/bnx2x/ BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE -M: Christian Daudt <csd@broadcom.com> +M: Christian Daudt <bcm@fixthebug.org> +L: bcm-kernel-feedback-list@broadcom.com T: git git://git.github.com/broadcom/bcm11351 S: Maintained F: arch/arm/mach-bcm/ @@ -2293,7 +2300,7 @@ S: Maintained F: drivers/net/ethernet/ti/cpmac.c CPU FREQUENCY DRIVERS -M: Rafael J. Wysocki <rjw@sisk.pl> +M: Rafael J. Wysocki <rjw@rjwysocki.net> M: Viresh Kumar <viresh.kumar@linaro.org> L: cpufreq@vger.kernel.org L: linux-pm@vger.kernel.org @@ -2324,7 +2331,7 @@ S: Maintained F: drivers/cpuidle/cpuidle-big_little.c CPUIDLE DRIVERS -M: Rafael J. Wysocki <rjw@sisk.pl> +M: Rafael J. Wysocki <rjw@rjwysocki.net> M: Daniel Lezcano <daniel.lezcano@linaro.org> L: linux-pm@vger.kernel.org S: Maintained @@ -2639,6 +2646,18 @@ F: include/linux/device-mapper.h F: include/linux/dm-*.h F: include/uapi/linux/dm-*.h +DIGI NEO AND CLASSIC PCI PRODUCTS +M: Lidza Louina <lidza.louina@gmail.com> +L: driverdev-devel@linuxdriverproject.org +S: Maintained +F: drivers/staging/dgnc/ + +DIGI EPCA PCI PRODUCTS +M: Lidza Louina <lidza.louina@gmail.com> +L: driverdev-devel@linuxdriverproject.org +S: Maintained +F: drivers/staging/dgap/ + DIOLAN U2C-12 I2C DRIVER M: Guenter Roeck <linux@roeck-us.net> L: linux-i2c@vger.kernel.org @@ -3534,7 +3553,7 @@ F: fs/freevxfs/ FREEZER M: Pavel Machek <pavel@ucw.cz> -M: "Rafael J. Wysocki" <rjw@sisk.pl> +M: "Rafael J. Wysocki" <rjw@rjwysocki.net> L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.txt @@ -3870,7 +3889,7 @@ F: drivers/video/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: Pavel Machek <pavel@ucw.cz> -M: "Rafael J. Wysocki" <rjw@sisk.pl> +M: "Rafael J. Wysocki" <rjw@rjwysocki.net> L: linux-pm@vger.kernel.org S: Supported F: arch/x86/power/ @@ -4320,7 +4339,7 @@ F: drivers/video/i810/ INTEL MENLOW THERMAL DRIVER M: Sujith Thomas <sujith.thomas@intel.com> L: platform-driver-x86@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ +W: https://01.org/linux-acpi S: Supported F: drivers/platform/x86/intel_menlow.c @@ -4457,6 +4476,13 @@ L: linux-serial@vger.kernel.org S: Maintained F: drivers/tty/serial/ioc3_serial.c +IOMMU DRIVERS +M: Joerg Roedel <joro@8bytes.org> +L: iommu@lists.linux-foundation.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +S: Maintained +F: drivers/iommu/ + IP MASQUERADING M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar> S: Maintained @@ -4788,7 +4814,8 @@ KERNEL VIRTUAL MACHINE (KVM) M: Gleb Natapov <gleb@redhat.com> M: Paolo Bonzini <pbonzini@redhat.com> L: kvm@vger.kernel.org -W: http://linux-kvm.org +W: http://www.linux-kvm.org +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git S: Supported F: Documentation/*/kvm*.txt F: Documentation/virtual/kvm/ @@ -6595,7 +6622,7 @@ S: Obsolete F: drivers/net/wireless/prism54/ PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER -M: Mikael Pettersson <mikpe@it.uu.se> +M: Mikael Pettersson <mikpelinux@gmail.com> L: linux-ide@vger.kernel.org S: Maintained F: drivers/ata/sata_promise.* @@ -7258,9 +7285,9 @@ F: include/linux/sched.h F: include/uapi/linux/sched.h SCORE ARCHITECTURE -M: Chen Liqin <liqin.chen@sunplusct.com> +M: Chen Liqin <liqin.linux@gmail.com> M: Lennox Wu <lennox.wu@gmail.com> -W: http://www.sunplusct.com +W: http://www.sunplus.com S: Supported F: arch/score/ @@ -8069,7 +8096,7 @@ F: drivers/sh/ SUSPEND TO RAM M: Len Brown <len.brown@intel.com> M: Pavel Machek <pavel@ucw.cz> -M: "Rafael J. Wysocki" <rjw@sisk.pl> +M: "Rafael J. Wysocki" <rjw@rjwysocki.net> L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/ @@ -8724,9 +8751,8 @@ F: Documentation/hid/hiddev.txt F: drivers/hid/usbhid/ USB/IP DRIVERS -M: Matt Mooney <mfm@muteddisk.com> L: linux-usb@vger.kernel.org -S: Maintained +S: Orphan F: drivers/staging/usbip/ USB ISP116X DRIVER @@ -9366,6 +9392,7 @@ F: arch/arm64/include/asm/xen/ XEN NETWORK BACKEND DRIVER M: Ian Campbell <ian.campbell@citrix.com> +M: Wei Liu <wei.liu2@citrix.com> L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: netdev@vger.kernel.org S: Supported @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc5 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 1feb169274fe..af2cc6eabcc7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP config HAVE_ARCH_JUMP_LABEL bool -config HAVE_ARCH_MUTEX_CPU_RELAX - bool - config HAVE_RCU_TABLE_FREE bool diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index f158197ac5b0..b6a8c2dfbe6e 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + + __asm__ __volatile__( + " ex %0, [%1] \n" + : "+r" (tmp) + : "r"(&(lock->slock)) + : "memory"); + smp_mb(); } diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 32420824375b..30c9baffa96f 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -43,7 +43,7 @@ * Because it essentially checks if buffer end is within limit and @len is * non-ngeative, which implies that buffer start will be within limit too. * - * The reason for rewriting being, for majorit yof cases, @len is generally + * The reason for rewriting being, for majority of cases, @len is generally * compile time constant, causing first sub-expression to be compile time * subsumed. * @@ -53,7 +53,7 @@ * */ #define __user_ok(addr, sz) (((sz) <= TASK_SIZE) && \ - (((addr)+(sz)) <= get_fs())) + ((addr) <= (get_fs() - (sz)))) #define __access_ok(addr, sz) (unlikely(__kernel_ok) || \ likely(__user_ok((addr), (sz)))) diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 333238564b67..5d76706139dd 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -102,7 +102,7 @@ static int genregs_set(struct task_struct *target, REG_IGNORE_ONE(pad2); REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ REG_IGNORE_ONE(efa); /* efa update invalid */ - REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */ + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ return ret; } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index ee6ef2f60a28..7e95e1a86510 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct rt_sigframe __user *sf; unsigned int magic; - int err; struct pt_regs *regs = current_pt_regs(); /* Always make any pending restarted system calls return -EINTR */ @@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) goto badframe; - err = restore_usr_regs(regs, sf); - err |= __get_user(magic, &sf->sigret_magic); - if (err) + if (__get_user(magic, &sf->sigret_magic)) goto badframe; if (unlikely(is_do_ss_needed(magic))) if (restore_altstack(&sf->uc.uc_stack)) goto badframe; + if (restore_usr_regs(regs, sf)) + goto badframe; + /* Don't restart from sigreturn */ syscall_wont_restart(regs); @@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, return 1; /* + * w/o SA_SIGINFO, struct ucontext is partially populated (only + * uc_mcontext/uc_sigmask) for kernel's normal user state preservation + * during signal handler execution. This works for SA_SIGINFO as well + * although the semantics are now overloaded (the same reg state can be + * inspected by userland: but are they allowed to fiddle with it ? + */ + err |= stash_usr_regs(sf, regs, set); + + /* * SA_SIGINFO requires 3 args to signal handler: * #1: sig-no (common to any handler) * #2: struct siginfo @@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, magic = MAGIC_SIGALTSTK; } - /* - * w/o SA_SIGINFO, struct ucontext is partially populated (only - * uc_mcontext/uc_sigmask) for kernel's normal user state preservation - * during signal handler execution. This works for SA_SIGINFO as well - * although the semantics are now overloaded (the same reg state can be - * inspected by userland: but are they allowed to fiddle with it ? - */ - err |= stash_usr_regs(sf, regs, set); err |= __put_user(magic, &sf->sigret_magic); if (err) return err; diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 0e51e69cf30d..3fde7de3ea67 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -227,12 +227,9 @@ void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu) { struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu); - clockevents_calc_mult_shift(clk, arc_get_core_freq(), 5); - - clk->max_delta_ns = clockevent_delta2ns(ARC_TIMER_MAX, clk); clk->cpumask = cpumask_of(cpu); - - clockevents_register_device(clk); + clockevents_config_and_register(clk, arc_get_core_freq(), + 0, ARC_TIMER_MAX); /* * setup the per-cpu timer IRQ handler - for all cpus diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 28d170060747..7ff5b5c183bb 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -245,6 +245,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; + + /* handle zero-overhead-loop */ + if ((regs->ret == regs->lp_end) && (regs->lp_count)) { + regs->ret = regs->lp_start; + regs->lp_count--; + } } return 0; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3f7714d8d2d2..1ad6fb6c094d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2217,8 +2217,7 @@ config NEON config KERNEL_MODE_NEON bool "Support for NEON in kernel mode" - default n - depends on NEON + depends on NEON && AEABI help Say Y to include support for NEON in kernel mode. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index a37a50f575a2..db50b626be98 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -296,10 +296,15 @@ archprepare: # Convert bzImage to zImage bzImage: zImage -zImage Image xipImage bootpImage uImage: vmlinux +BOOT_TARGETS = zImage Image xipImage bootpImage uImage +INSTALL_TARGETS = zinstall uinstall install + +PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS) + +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ -zinstall uinstall install: vmlinux +$(INSTALL_TARGETS): $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ %.dtb: | scripts diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 84aa2caf07ed..ec2f8065f955 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -95,24 +95,24 @@ initrd: @test "$(INITRD)" != "" || \ (echo You must specify INITRD; exit -1) -install: $(obj)/Image - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +install: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/Image System.map "$(INSTALL_PATH)" -zinstall: $(obj)/zImage - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +zinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/zImage System.map "$(INSTALL_PATH)" -uinstall: $(obj)/uImage - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +uinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/uImage System.map "$(INSTALL_PATH)" zi: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/zImage System.map "$(INSTALL_PATH)" i: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/Image System.map "$(INSTALL_PATH)" subdir- := bootp compressed dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index e95af3f5433b..802720e3e8fd 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -41,6 +41,8 @@ dtb-$(CONFIG_ARCH_AT91) += sama5d33ek.dtb dtb-$(CONFIG_ARCH_AT91) += sama5d34ek.dtb dtb-$(CONFIG_ARCH_AT91) += sama5d35ek.dtb +dtb-$(CONFIG_ARCH_ATLAS6) += atlas6-evb.dtb + dtb-$(CONFIG_ARCH_BCM2835) += bcm2835-rpi-b.dtb dtb-$(CONFIG_ARCH_BCM) += bcm11351-brt.dtb \ bcm28155-ap.dtb diff --git a/arch/arm/boot/dts/armada-370-netgear-rn102.dts b/arch/arm/boot/dts/armada-370-netgear-rn102.dts index 05e4485a8225..8ac2ac1f69cc 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts @@ -27,6 +27,25 @@ }; soc { + ranges = <MBUS_ID(0xf0, 0x01) 0 0xd0000000 0x100000 + MBUS_ID(0x01, 0xe0) 0 0xfff00000 0x100000>; + + pcie-controller { + status = "okay"; + + /* Connected to Marvell SATA controller */ + pcie@1,0 { + /* Port 0, Lane 0 */ + status = "okay"; + }; + + /* Connected to FL1009 USB 3.0 controller */ + pcie@2,0 { + /* Port 1, Lane 0 */ + status = "okay"; + }; + }; + internal-regs { serial@12000 { clock-frequency = <200000000>; @@ -57,6 +76,11 @@ marvell,pins = "mpp56"; marvell,function = "gpio"; }; + + poweroff: poweroff { + marvell,pins = "mpp8"; + marvell,function = "gpio"; + }; }; mdio { @@ -89,22 +113,6 @@ pwm_polarity = <0>; }; }; - - pcie-controller { - status = "okay"; - - /* Connected to Marvell SATA controller */ - pcie@1,0 { - /* Port 0, Lane 0 */ - status = "okay"; - }; - - /* Connected to FL1009 USB 3.0 controller */ - pcie@2,0 { - /* Port 1, Lane 0 */ - status = "okay"; - }; - }; }; }; @@ -160,7 +168,7 @@ button@1 { label = "Power Button"; linux,code = <116>; /* KEY_POWER */ - gpios = <&gpio1 30 1>; + gpios = <&gpio1 30 0>; }; button@2 { @@ -176,4 +184,11 @@ }; }; + gpio_poweroff { + compatible = "gpio-poweroff"; + pinctrl-0 = <&poweroff>; + pinctrl-names = "default"; + gpios = <&gpio0 8 1>; + }; + }; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index def125c0eeaa..3058522f5aad 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -70,6 +70,8 @@ timer@20300 { compatible = "marvell,armada-xp-timer"; + clocks = <&coreclk 2>, <&refclk>; + clock-names = "nbclk", "fixed"; }; coreclk: mvebu-sar@18230 { @@ -169,4 +171,13 @@ }; }; }; + + clocks { + /* 25 MHz reference crystal */ + refclk: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000000>; + }; + }; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index cf78ac0b04b1..e74dc15efa9d 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -190,12 +190,12 @@ AT91_PIOA 8 AT91_PERIPH_A AT91_PINCTRL_NONE>; /* PA8 periph A */ }; - pinctrl_uart2_rts: uart2_rts-0 { + pinctrl_usart2_rts: usart2_rts-0 { atmel,pins = <AT91_PIOB 0 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PB0 periph B */ }; - pinctrl_uart2_cts: uart2_cts-0 { + pinctrl_usart2_cts: usart2_cts-0 { atmel,pins = <AT91_PIOB 1 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PB1 periph B */ }; @@ -556,6 +556,7 @@ interrupts = <12 IRQ_TYPE_LEVEL_HIGH 0>; dmas = <&dma0 1 AT91_DMA_CFG_PER_ID(0)>; dma-names = "rxtx"; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -567,6 +568,7 @@ interrupts = <26 IRQ_TYPE_LEVEL_HIGH 0>; dmas = <&dma1 1 AT91_DMA_CFG_PER_ID(0)>; dma-names = "rxtx"; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 8678e0c11119..6db4f81d4795 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -181,6 +181,8 @@ interrupts = <17>; fifosize = <128>; clocks = <&clks 13>; + sirf,uart-dma-rx-channel = <21>; + sirf,uart-dma-tx-channel = <2>; }; uart1: uart@b0060000 { @@ -199,6 +201,8 @@ interrupts = <19>; fifosize = <128>; clocks = <&clks 15>; + sirf,uart-dma-rx-channel = <6>; + sirf,uart-dma-tx-channel = <7>; }; usp0: usp@b0080000 { @@ -206,7 +210,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0080000 0x10000>; interrupts = <20>; + fifosize = <128>; clocks = <&clks 28>; + sirf,usp-dma-rx-channel = <17>; + sirf,usp-dma-tx-channel = <18>; }; usp1: usp@b0090000 { @@ -214,7 +221,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0090000 0x10000>; interrupts = <21>; + fifosize = <128>; clocks = <&clks 29>; + sirf,usp-dma-rx-channel = <14>; + sirf,usp-dma-tx-channel = <15>; }; dmac0: dma-controller@b00b0000 { @@ -237,6 +247,8 @@ compatible = "sirf,prima2-vip"; reg = <0xb00C0000 0x10000>; clocks = <&clks 31>; + interrupts = <14>; + sirf,vip-dma-rx-channel = <16>; }; spi0: spi@b00d0000 { diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 7d7cc777ff7b..bbac42a78ce5 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -96,6 +96,11 @@ <1 14 0xf08>, <1 11 0xf08>, <1 10 0xf08>; + /* Unfortunately we need this since some versions of U-Boot + * on Exynos don't set the CNTFRQ register, so we need the + * value from DT. + */ + clock-frequency = <24000000>; }; mct@101C0000 { diff --git a/arch/arm/boot/dts/kirkwood.dtsi b/arch/arm/boot/dts/kirkwood.dtsi index cf7aeaf89e9c..1335b2e1bed4 100644 --- a/arch/arm/boot/dts/kirkwood.dtsi +++ b/arch/arm/boot/dts/kirkwood.dtsi @@ -13,6 +13,7 @@ cpu@0 { device_type = "cpu"; compatible = "marvell,feroceon"; + reg = <0>; clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>; clock-names = "cpu_clk", "ddrclk", "powersave"; }; @@ -167,7 +168,7 @@ xor@60900 { compatible = "marvell,orion-xor"; reg = <0x60900 0x100 - 0xd0B00 0x100>; + 0x60B00 0x100>; status = "okay"; clocks = <&gate_clk 16>; diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index 0c514dc8460c..2816bf612672 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -11,7 +11,7 @@ / { model = "TI OMAP3 BeagleBoard xM"; - compatible = "ti,omap3-beagle-xm", "ti,omap3-beagle", "ti,omap3"; + compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 7d95cda1fae4..b41bd57f4328 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -108,7 +108,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; omap3_pmx_wkup: pinmux@0x48002a00 { @@ -117,7 +117,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; gpio1: gpio@48310000 { diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi index bbeb623fc2c6..27ed9f5144bc 100644 --- a/arch/arm/boot/dts/prima2.dtsi +++ b/arch/arm/boot/dts/prima2.dtsi @@ -171,7 +171,8 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0xb0000000 0xb0000000 0x180000>; + ranges = <0xb0000000 0xb0000000 0x180000>, + <0x56000000 0x56000000 0x1b00000>; timer@b0020000 { compatible = "sirf,prima2-tick"; @@ -196,25 +197,32 @@ uart0: uart@b0050000 { cell-index = <0>; compatible = "sirf,prima2-uart"; - reg = <0xb0050000 0x10000>; + reg = <0xb0050000 0x1000>; interrupts = <17>; + fifosize = <128>; clocks = <&clks 13>; + sirf,uart-dma-rx-channel = <21>; + sirf,uart-dma-tx-channel = <2>; }; uart1: uart@b0060000 { cell-index = <1>; compatible = "sirf,prima2-uart"; - reg = <0xb0060000 0x10000>; + reg = <0xb0060000 0x1000>; interrupts = <18>; + fifosize = <32>; clocks = <&clks 14>; }; uart2: uart@b0070000 { cell-index = <2>; compatible = "sirf,prima2-uart"; - reg = <0xb0070000 0x10000>; + reg = <0xb0070000 0x1000>; interrupts = <19>; + fifosize = <128>; clocks = <&clks 15>; + sirf,uart-dma-rx-channel = <6>; + sirf,uart-dma-tx-channel = <7>; }; usp0: usp@b0080000 { @@ -222,7 +230,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0080000 0x10000>; interrupts = <20>; + fifosize = <128>; clocks = <&clks 28>; + sirf,usp-dma-rx-channel = <17>; + sirf,usp-dma-tx-channel = <18>; }; usp1: usp@b0090000 { @@ -230,7 +241,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0090000 0x10000>; interrupts = <21>; + fifosize = <128>; clocks = <&clks 29>; + sirf,usp-dma-rx-channel = <14>; + sirf,usp-dma-tx-channel = <15>; }; usp2: usp@b00a0000 { @@ -238,7 +252,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb00a0000 0x10000>; interrupts = <22>; + fifosize = <128>; clocks = <&clks 30>; + sirf,usp-dma-rx-channel = <10>; + sirf,usp-dma-tx-channel = <11>; }; dmac0: dma-controller@b00b0000 { @@ -261,6 +278,8 @@ compatible = "sirf,prima2-vip"; reg = <0xb00C0000 0x10000>; clocks = <&clks 31>; + interrupts = <14>; + sirf,vip-dma-rx-channel = <16>; }; spi0: spi@b00d0000 { diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index 6c26caa880f2..658fcc537576 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -193,7 +193,7 @@ }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee100000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 165 4>; @@ -202,7 +202,7 @@ }; sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee120000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 166 4>; @@ -211,7 +211,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee140000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 167 4>; diff --git a/arch/arm/boot/dts/r8a7778.dtsi b/arch/arm/boot/dts/r8a7778.dtsi index 45ac404ab6d8..3577aba82583 100644 --- a/arch/arm/boot/dts/r8a7778.dtsi +++ b/arch/arm/boot/dts/r8a7778.dtsi @@ -96,6 +96,5 @@ pfc: pfc@fffc0000 { compatible = "renesas,pfc-r8a7778"; reg = <0xfffc000 0x118>; - #gpio-range-cells = <3>; }; }; diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 23a62447359c..ebbe507fcbfa 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -188,7 +188,6 @@ pfc: pfc@fffc0000 { compatible = "renesas,pfc-r8a7779"; reg = <0xfffc0000 0x23c>; - #gpio-range-cells = <3>; }; thermal@ffc48000 { diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 3b879e7c697c..413b4c29e782 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -148,11 +148,10 @@ pfc: pfc@e6060000 { compatible = "renesas,pfc-r8a7790"; reg = <0 0xe6060000 0 0x250>; - #gpio-range-cells = <3>; }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee100000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 165 4>; @@ -161,7 +160,7 @@ }; sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee120000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 166 4>; @@ -170,7 +169,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee140000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 167 4>; @@ -179,7 +178,7 @@ }; sdhi3: sdhi@ee160000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee160000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 168 4>; diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi index ba59a5875a10..3955c7606a6f 100644 --- a/arch/arm/boot/dts/sh73a0.dtsi +++ b/arch/arm/boot/dts/sh73a0.dtsi @@ -196,7 +196,7 @@ }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee100000 0x100>; interrupt-parent = <&gic>; interrupts = <0 83 4 @@ -208,7 +208,7 @@ /* SDHI1 and SDHI2 have no CD pins, no need for CD IRQ */ sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee120000 0x100>; interrupt-parent = <&gic>; interrupts = <0 88 4 @@ -219,7 +219,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee140000 0x100>; interrupt-parent = <&gic>; interrupts = <0 104 4 diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 06ea7d42ce8e..2a45092a40e3 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -20,6 +20,20 @@ # $4 - default install path (blank if root directory) # +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + # User may have a custom install script if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 117f955a2a06..8e1a0245907f 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -269,6 +269,11 @@ static const struct edmacc_param dummy_paramset = { .ccnt = 1, }; +static const struct of_device_id edma_of_ids[] = { + { .compatible = "ti,edma3", }, + {} +}; + /*****************************************************************************/ static void map_dmach_queue(unsigned ctlr, unsigned ch_no, @@ -560,14 +565,38 @@ static int reserve_contiguous_slots(int ctlr, unsigned int id, static int prepare_unused_channel_list(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev); - int i, ctlr; + int i, count, ctlr; + struct of_phandle_args dma_spec; + if (dev->of_node) { + count = of_property_count_strings(dev->of_node, "dma-names"); + if (count < 0) + return 0; + for (i = 0; i < count; i++) { + if (of_parse_phandle_with_args(dev->of_node, "dmas", + "#dma-cells", i, + &dma_spec)) + continue; + + if (!of_match_node(edma_of_ids, dma_spec.np)) { + of_node_put(dma_spec.np); + continue; + } + + clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]), + edma_cc[0]->edma_unused); + of_node_put(dma_spec.np); + } + return 0; + } + + /* For non-OF case */ for (i = 0; i < pdev->num_resources; i++) { if ((pdev->resource[i].flags & IORESOURCE_DMA) && (int)pdev->resource[i].start >= 0) { ctlr = EDMA_CTLR(pdev->resource[i].start); clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start), - edma_cc[ctlr]->edma_unused); + edma_cc[ctlr]->edma_unused); } } @@ -1762,11 +1791,6 @@ static int edma_probe(struct platform_device *pdev) return 0; } -static const struct of_device_id edma_of_ids[] = { - { .compatible = "ti,edma3", }, - {} -}; - static struct platform_driver edma_driver = { .driver = { .name = "edma", diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a03..990250965f2c 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -51,7 +51,8 @@ void mcpm_cpu_power_down(void) { phys_reset_t phys_reset; - BUG_ON(!platform_ops); + if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down)) + return; BUG_ON(!irqs_disabled()); /* @@ -93,7 +94,8 @@ void mcpm_cpu_suspend(u64 expected_residency) { phys_reset_t phys_reset; - BUG_ON(!platform_ops); + if (WARN_ON_ONCE(!platform_ops || !platform_ops->suspend)) + return; BUG_ON(!irqs_disabled()); /* Very similar to mcpm_cpu_power_down() */ diff --git a/arch/arm/common/sharpsl_param.c b/arch/arm/common/sharpsl_param.c index d56c932580eb..025f6ce38596 100644 --- a/arch/arm/common/sharpsl_param.c +++ b/arch/arm/common/sharpsl_param.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/string.h> #include <asm/mach/sharpsl_param.h> +#include <asm/memory.h> /* * Certain hardware parameters determined at the time of device manufacture, @@ -25,8 +26,10 @@ */ #ifdef CONFIG_ARCH_SA1100 #define PARAM_BASE 0xe8ffc000 +#define param_start(x) (void *)(x) #else #define PARAM_BASE 0xa0000a00 +#define param_start(x) __va(x) #endif #define MAGIC_CHG(a,b,c,d) ( ( d << 24 ) | ( c << 16 ) | ( b << 8 ) | a ) @@ -41,7 +44,7 @@ EXPORT_SYMBOL(sharpsl_param); void sharpsl_save_param(void) { - memcpy(&sharpsl_param, (void *)PARAM_BASE, sizeof(struct sharpsl_param_info)); + memcpy(&sharpsl_param, param_start(PARAM_BASE), sizeof(struct sharpsl_param_info)); if (sharpsl_param.comadj_keyword != COMADJ_MAGIC) sharpsl_param.comadj=-1; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index f3935b46df29..119fc378fc52 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -135,6 +135,7 @@ CONFIG_MMC=y CONFIG_MMC_ARMMMCI=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_ESDHC_IMX=y CONFIG_MMC_SDHCI_TEGRA=y CONFIG_MMC_SDHCI_SPEAR=y CONFIG_MMC_OMAP=y diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 19d6cd6f29f9..3a14ea8fe97e 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -148,7 +148,7 @@ AES_Te: @ const AES_KEY *key) { .align 5 ENTRY(AES_encrypt) - sub r3,pc,#8 @ AES_encrypt + adr r3,AES_encrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -381,7 +381,7 @@ _armv4_AES_encrypt: .align 5 ENTRY(private_AES_set_encrypt_key) _armv4_AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key + adr r3,_armv4_AES_set_encrypt_key teq r0,#0 moveq r0,#-1 beq .Labrt @@ -843,7 +843,7 @@ AES_Td: @ const AES_KEY *key) { .align 5 ENTRY(AES_decrypt) - sub r3,pc,#8 @ AES_decrypt + adr r3,AES_decrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db39860b9c..59ceae8f3c95 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -31,5 +31,4 @@ generic-y += termbits.h generic-y += termios.h generic-y += timex.h generic-y += trace_clock.h -generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index bfc198c75913..863c892b4aaa 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -16,7 +16,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" JUMP_LABEL_NOP "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 64e96960de29..1d3153c7eb41 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -57,6 +57,7 @@ * TSC: Trap SMC * TSW: Trap cache operations by set/way * TWI: Trap WFI + * TWE: Trap WFE * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain * FB: Force broadcast of all maintainance operations @@ -67,7 +68,7 @@ */ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ - HCR_SWIO | HCR_TIDCP) + HCR_TWE | HCR_SWIO | HCR_TIDCP) #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* System Control Register (SCTLR) bits */ @@ -95,12 +96,12 @@ #define TTBCR_IRGN1 (3 << 24) #define TTBCR_EPD1 (1 << 23) #define TTBCR_A1 (1 << 22) -#define TTBCR_T1SZ (3 << 16) +#define TTBCR_T1SZ (7 << 16) #define TTBCR_SH0 (3 << 12) #define TTBCR_ORGN0 (3 << 10) #define TTBCR_IRGN0 (3 << 8) #define TTBCR_EPD0 (1 << 7) -#define TTBCR_T0SZ 3 +#define TTBCR_T0SZ (7 << 0) #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) /* Hyp System Trap Register */ @@ -208,6 +209,8 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_WFI_IS_WFE (1U << 0) + #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) #define HSR_DABT_S1PTW (1U << 7) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index a2f43ddcc300..661da11f76f4 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -39,7 +39,7 @@ #define c6_IFAR 17 /* Instruction Fault Address Register */ #define c7_PAR 18 /* Physical Address Register */ #define c7_PAR_high 19 /* PAR top 32 bits */ -#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ #define c10_PRRR 21 /* Primary Region Remap Register */ #define c10_NMRR 22 /* Normal Memory Remap Register */ #define c12_VBAR 23 /* Vector Base Address Register */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index e844b335e33a..0fa90c962ac8 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -157,6 +157,11 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cp15[c0_MPIDR]; +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { *vcpu_cpsr(vcpu) |= PSR_E_BIT; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 7d22517d8071..8a6f6db14ee4 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -38,11 +38,6 @@ #define KVM_VCPU_MAX_FEATURES 1 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - #include <kvm/arm_vgic.h> struct kvm_vcpu; @@ -154,6 +149,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 9b28c41f4ba9..77de4a41cc50 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) +{ + *pmd = new_pmd; + flush_pmd_entry(pmd); +} + static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { *pte = new_pte; @@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= L_PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= L_PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { /* * If we are going to insert an instruction page and the icache is @@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) * need any kind of flushing (DDI 0406C.b - Page B3-1392). */ if (icache_is_pipt()) { - unsigned long hva = gfn_to_hva(kvm, gfn); - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + __cpuc_coherent_user_range(hva, hva + size); } else if (!icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a5..fc82a88f5b69 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -76,8 +76,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster); * * This must be called with interrupts disabled. * - * This does not return. Re-entry in the kernel is expected via - * mcpm_entry_point. + * On success this does not return. Re-entry in the kernel is expected + * via mcpm_entry_point. + * + * This will return if mcpm_platform_register() has not been called + * previously in which case the caller should take appropriate action. */ void mcpm_cpu_power_down(void); @@ -98,8 +101,11 @@ void mcpm_cpu_power_down(void); * * This must be called with interrupts disabled. * - * This does not return. Re-entry in the kernel is expected via - * mcpm_entry_point. + * On success this does not return. Re-entry in the kernel is expected + * via mcpm_entry_point. + * + * This will return if mcpm_platform_register() has not been called + * previously in which case the caller should take appropriate action. */ void mcpm_cpu_suspend(u64 expected_residency); diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5689c18c85f5..a331d2527342 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -126,6 +126,8 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Hyp-mode PL2 PTE definitions for LPAE. */ diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index f1d96d4e8092..73ddd7239b33 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -57,6 +57,9 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; @@ -81,6 +84,9 @@ static inline void syscall_set_arguments(struct task_struct *task, unsigned int i, unsigned int n, const unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { pr_warning("%s called with max args %d, handling only %d\n", __func__, i + n, SYSCALL_MAX_ARGS); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7e1f76027f66..72abdc541f38 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -19,6 +19,13 @@ #include <asm/unified.h> #include <asm/compiler.h> +#if __LINUX_ARM_ARCH__ < 6 +#include <asm-generic/uaccess-unaligned.h> +#else +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user +#endif + #define VERIFY_READ 0 #define VERIFY_WRITE 1 diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c1ee007523d7..c498b60c0505 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -63,7 +63,8 @@ struct kvm_regs { /* Supported Processor Types */ #define KVM_ARM_TARGET_CORTEX_A15 0 -#define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_TARGET_CORTEX_A7 1 +#define KVM_ARM_NUM_TARGETS 2 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 74ad15d1a065..bc6bd9683ba4 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -442,10 +442,10 @@ local_restart: ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF - cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) +2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall -2: mov why, #0 @ no longer a real syscall + mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func #if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index de23a9beed13..39f89fbd5111 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -329,10 +329,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_exit + bl context_tracking_user_exit ldmia sp!, {r0-r3, ip, lr} .else - bl user_exit + bl context_tracking_user_exit .endif #endif .endm @@ -341,10 +341,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_enter + bl context_tracking_user_enter ldmia sp!, {r0-r3, ip, lr} .else - bl user_enter + bl context_tracking_user_enter .endif #endif .endm diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c7cc1e03473..476de57dcef2 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -487,7 +487,26 @@ __fixup_smp: mrc p15, 0, r0, c0, c0, 5 @ read MPIDR and r0, r0, #0xc0000000 @ multiprocessing extensions and teq r0, #0x80000000 @ not part of a uniprocessor system? - moveq pc, lr @ yes, assume SMP + bne __fixup_smp_on_up @ no, assume UP + + @ Core indicates it is SMP. Check for Aegis SOC where a single + @ Cortex-A9 CPU is present but SMP operations fault. + mov r4, #0x41000000 + orr r4, r4, #0x0000c000 + orr r4, r4, #0x00000090 + teq r3, r4 @ Check for ARM Cortex-A9 + movne pc, lr @ Not ARM Cortex-A9, + + @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the + @ below address check will need to be #ifdef'd or equivalent + @ for the Aegis platform. + mrc p15, 4, r0, c15, c0 @ get SCU base address + teq r0, #0x0 @ '0' on actual UP A9 hardware + beq __fixup_smp_on_up @ So its an A9 UP + ldr r0, [r0, #4] @ read SCU Config + and r0, r0, #0x3 @ number of CPUs + teq r0, #0x0 @ is 1? + movne pc, lr __fixup_smp_on_up: adr r0, 1f diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index ebf5015508b5..466bd299b1a8 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d99bee4950e5..789bca9e64a7 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..e312e4a53f8d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } @@ -797,6 +798,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index db9cf692d4dd..78c0885d6501 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -71,6 +71,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + /* + * Compute guest MPIDR. We build a virtual cluster out of the + * vcpu_id, but we read the 'U' bit from the underlying + * hardware directly. + */ + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | + (vcpu->vcpu_id & 3)); +} + +/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; + return true; +} + +/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ +static bool access_cbar(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p); + return read_zero(vcpu, p); +} + +/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ +static bool access_l2ctlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; + return true; +} + +static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 l2ctlr, ncores; + + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + l2ctlr &= ~(3 << 24); + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; + /* How many cores in the current cluster and the next ones */ + ncores -= (vcpu->vcpu_id & ~3); + /* Cap it to the maximum number of cores in a single cluster */ + ncores = min(ncores, 3U); + l2ctlr |= (ncores & 3) << 24; + + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) +{ + u32 actlr; + + /* ACTLR contains SMP bit: make sure you create all cpus first! */ + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + /* Make the SMP bit consistent with the guest configuration */ + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) + actlr |= 1U << 6; + else + actlr &= ~(1U << 6); + + vcpu->arch.cp15[c1_ACTLR] = actlr; +} + +/* + * TRM entries: A7:4.3.50, A15:4.3.49 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). + */ +static bool access_l2ectlr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = 0; + return true; +} + /* See note at ARM ARM B1.14.4 */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, @@ -153,10 +245,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg cp15_regs[] = { + /* MPIDR: we use VMPIDR for guest access. */ + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, + NULL, reset_mpidr, c0_MPIDR }, + /* CSSELR: swapped by interrupt.S. */ { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, NULL, reset_unknown, c0_CSSELR }, + /* ACTLR: trapped by HCR.TAC bit. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, + access_actlr, reset_actlr, c1_ACTLR }, + + /* CPACR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, + NULL, reset_val, c1_CPACR, 0x00000000 }, + /* TTBR0/TTBR1: swapped by interrupt.S. */ { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, @@ -195,6 +299,13 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, /* + * L2CTLR access (guest wants to know #CPUs). + */ + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, + + /* * Dummy performance monitor implementation. */ { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr}, @@ -234,6 +345,9 @@ static const struct coproc_reg cp15_regs[] = { /* CNTKCTL: swapped by interrupt.S. */ { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, NULL, reset_val, c14_CNTKCTL, 0x00000000 }, + + /* The Configuration Base Address Register. */ + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; /* Target specific emulation tables */ @@ -241,6 +355,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) { + unsigned int i; + + for (i = 1; i < table->num; i++) + BUG_ON(cmp_reg(&table->table[i-1], + &table->table[i]) >= 0); + target_tables[table->target] = table; } diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index cf93472b9dd6..bb0cac1410cc 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -17,101 +17,12 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_host.h> -#include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> #include <linux/init.h> -static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - /* - * Compute guest MPIDR: - * (Even if we present only one VCPU to the guest on an SMP - * host we don't set the U bit in the MPIDR, or vice versa, as - * revealing the underlying hardware properties is likely to - * be the best choice). - */ - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); -} - #include "coproc.h" -/* A15 TRM 4.3.28: RO WI */ -static bool access_actlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; - return true; -} - -/* A15 TRM 4.3.60: R/O. */ -static bool access_cbar(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return write_to_read_only(vcpu, p); - return read_zero(vcpu, p); -} - -/* A15 TRM 4.3.48: R/O WI. */ -static bool access_l2ctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; - return true; -} - -static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 l2ctlr, ncores; - - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); - l2ctlr &= ~(3 << 24); - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; - l2ctlr |= (ncores & 3) << 24; - - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) -{ - u32 actlr; - - /* ACTLR contains SMP bit: make sure you create all cpus first! */ - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); - /* Make the SMP bit consistent with the guest configuration */ - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) - actlr |= 1U << 6; - else - actlr &= ~(1U << 6); - - vcpu->arch.cp15[c1_ACTLR] = actlr; -} - -/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ -static bool access_l2ectlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - *vcpu_reg(vcpu, p->Rt1) = 0; - return true; -} - /* * A15-specific CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, * registers preceding 32-bit ones. */ static const struct coproc_reg a15_regs[] = { - /* MPIDR: we use VMPIDR for guest access. */ - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, - NULL, reset_mpidr, c0_MPIDR }, - /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c1_SCTLR, 0x00C50078 }, - /* ACTLR: trapped by HCR.TAC bit. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, - access_actlr, reset_actlr, c1_ACTLR }, - /* CPACR: swapped by interrupt.S. */ - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, - NULL, reset_val, c1_CPACR, 0x00000000 }, - - /* - * L2CTLR access (guest wants to know #CPUs). - */ - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, - - /* The Configuration Base Address Register. */ - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; static struct kvm_coproc_target_table a15_target_table = { @@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = { static int __init coproc_a15_init(void) { - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) - BUG_ON(cmp_reg(&a15_regs[i-1], - &a15_regs[i]) >= 0); - kvm_register_target_coproc_table(&a15_target_table); return 0; } diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c new file mode 100644 index 000000000000..1df767331588 --- /dev/null +++ b/arch/arm/kvm/coproc_a7.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Copyright (C) 2013 - ARM Ltd + * + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * Jonathan Austin <jonathan.austin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include <linux/kvm_host.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_emulate.h> +#include <linux/init.h> + +#include "coproc.h" + +/* + * Cortex-A7 specific CP15 registers. + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + * registers preceding 32-bit ones. + */ +static const struct coproc_reg a7_regs[] = { + /* SCTLR: swapped by interrupt.S. */ + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, + NULL, reset_val, c1_SCTLR, 0x00C50878 }, +}; + +static struct kvm_coproc_target_table a7_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A7, + .table = a7_regs, + .num = ARRAY_SIZE(a7_regs), +}; + +static int __init coproc_a7_init(void) +{ + kvm_register_target_coproc_table(&a7_target_table); + return 0; +} +late_initcall(coproc_a7_init); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index bdede9e7da51..d6c005283678 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; if (is_pabt) { - /* Set DFAR and DFSR */ + /* Set IFAR and IFSR */ vcpu->arch.cp15[c6_IFAR] = addr; is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); /* Always give debug fault for now - should give guest a clue */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d03612181..20f8d97904af 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; switch (part_number) { + case ARM_CPU_PART_CORTEX_A7: + return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: return KVM_ARM_TARGET_CORTEX_A15; default: @@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, { unsigned int i; - /* We can only do a cortex A15 for now. */ + /* We can only cope with guest==host and only on A15/A7 (for now). */ if (init->target != kvm_target_cpu()) return -EINVAL; @@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index df4c82d47ad7..a92079011a83 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -73,23 +73,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) } /** - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer * @run: the kvm_run structure pointer * - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will - * halt execution of world-switches and schedule other host processes until - * there is an incoming IRQ or FIQ to the VM. + * WFE: Yield the CPU and come back to this vcpu when the scheduler + * decides to. + * WFI: Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(*vcpu_pc(vcpu)); - kvm_vcpu_block(vcpu); + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + kvm_vcpu_on_spin(vcpu); + else + kvm_vcpu_block(vcpu); + return 1; } static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI] = kvm_handle_wfi, + [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, [HSR_EC_CP14_MR] = kvm_handle_cp14_access, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b0de86b56c13..371958370de4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -19,6 +19,7 @@ #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> +#include <linux/hugetlb.h> #include <trace/events/kvm.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -93,19 +96,29 @@ static bool page_empty(void *ptr) static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + if (pud_huge(*pud)) { + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pmd_free(NULL, pmd_table); + } put_page(virt_to_page(pud)); } static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); + if (kvm_pmd_huge(*pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + } put_page(virt_to_page(pmd)); } @@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, continue; } + if (pud_huge(*pud)) { + /* + * If we are dealing with a huge pud, just clear it and + * move on. + */ + clear_pud_entry(kvm, pud, addr); + addr = pud_addr_end(addr, end); + continue; + } + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { addr = pmd_addr_end(addr, end); continue; } - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; + if (!kvm_pmd_huge(*pmd)) { + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(kvm, pte, addr); + next = addr + PAGE_SIZE; + } - /* If we emptied the pte, walk back up the ladder */ - if (page_empty(pte)) { + /* + * If the pmd entry is to be cleared, walk back up the ladder + */ + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { clear_pmd_entry(kvm, pmd, addr); next = pmd_addr_end(addr, end); if (page_empty(pmd) && !page_empty(pud)) { @@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } - -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 1 */ pgd = kvm->arch.pgd + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none(*pud)) { if (!cache) - return 0; /* ignore calls from kvm_set_spte_hva */ + return NULL; pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); } - pmd = pmd_offset(pud, addr); + return pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + kvm_set_pmd(pmd, *new_pmd); + if (pmd_present(old_pmd)) + kvm_tlb_flush_vmid_ipa(kvm, addr); + else + get_page(virt_to_page(pmd)); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, bool iomap) +{ + pmd_t *pmd; + pte_t *pte, old_pte; - /* Create 2nd stage page table mapping - Level 2 */ + /* Create stage-2 page table mapping - Level 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ @@ -507,16 +576,60 @@ out: return ret; } +static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) +{ + pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompound(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn, struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long fault_status) { - pte_t new_pte; - pfn_t pfn; int ret; - bool write_fault, writable; + bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); + struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + pfn_t pfn; write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); if (fault_status == FSC_PERM && !write_fault) { @@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (is_vm_hugetlb_page(vma)) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to VMAs not aligned to the PMD mapping + * granularity cannot be mapped using block descriptors even + * if the pages belong to a THP for the process, because the + * stage-2 block descriptor will cover more than a single THP + * and we loose atomicity for unmapping, updates, and splits + * of the THP or other pages in the stage-2 block range. + */ + if (vma->vm_start & ~PMD_MASK) + force_pte = true; + } + up_read(¤t->mm->mmap_sem); + /* We need minimum second+third level pages */ ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); if (ret) @@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (is_error_pfn(pfn)) return -EFAULT; - new_pte = pfn_pte(pfn, PAGE_S2); - coherent_icache_guest_page(vcpu->kvm, gfn); - - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (writable) { - kvm_set_s2pte_writable(&new_pte); - kvm_set_pfn_dirty(pfn); + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + kvm_set_s2pmd_writable(&new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, PAGE_S2); + if (writable) { + kvm_set_s2pte_writable(&new_pte); + kvm_set_pfn_dirty(pfn); + } + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); + out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return 0; + return ret; } /** @@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) memslot = gfn_to_memslot(vcpu->kvm, gfn); - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index ae0e06b6a492..0881bf169fbc 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/wait.h> +#include <asm/cputype.h> #include <asm/kvm_emulate.h> #include <asm/kvm_psci.h> @@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL, *tmp; wait_queue_head_t *wq; unsigned long cpu_id; + unsigned long mpidr; phys_addr_t target_pc; + int i; cpu_id = *vcpu_reg(source_vcpu, 1); if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - if (cpu_id >= atomic_read(&kvm->online_vcpus)) + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr(tmp); + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { + vcpu = tmp; + break; + } + } + + if (!vcpu) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - vcpu = kvm_get_vcpu(kvm, cpu_id); - wq = kvm_arch_vcpu_wq(vcpu); if (!waitqueue_active(wq)) return KVM_PSCI_RET_INVAL; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee209..f558c073c023 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -30,16 +30,14 @@ #include <kvm/arm_arch_timer.h> /****************************************************************************** - * Cortex-A15 Reset Values + * Cortex-A15 and Cortex-A7 Reset Values */ -static const int a15_max_cpu_idx = 3; - -static struct kvm_regs a15_regs_reset = { +static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level a15_vtimer_irq = { +static const struct kvm_irq_level cortexa_vtimer_irq = { { .irq = 27 }, .level = 1, }; @@ -58,23 +56,22 @@ static const struct kvm_irq_level a15_vtimer_irq = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: - if (vcpu->vcpu_id > a15_max_cpu_idx) - return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &a15_vtimer_irq; + cpu_vtimer_irq = &cortexa_vtimer_irq; break; default: return -ENODEV; } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 180b3024bec3..f607deb40f4d 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -93,7 +93,7 @@ static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id) static struct irqaction at91rm9200_timer_irq = { .name = "at91_tick", - .flags = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, .handler = at91rm9200_timer_interrupt, .irq = NR_IRQS_LEGACY + AT91_ID_SYS, }; diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 3a4bc2e1a65e..bb392320a0dd 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -171,7 +171,7 @@ static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id) static struct irqaction at91sam926x_pit_irq = { .name = "at91_tick", - .flags = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, .handler = at91sam926x_pit_interrupt, .irq = NR_IRQS_LEGACY + AT91_ID_SYS, }; diff --git a/arch/arm/mach-at91/at91sam9g45_reset.S b/arch/arm/mach-at91/at91sam9g45_reset.S index 721a1a34dd1d..c40c1e2ef80f 100644 --- a/arch/arm/mach-at91/at91sam9g45_reset.S +++ b/arch/arm/mach-at91/at91sam9g45_reset.S @@ -16,11 +16,17 @@ #include "at91_rstc.h" .arm +/* + * at91_ramc_base is an array void* + * init at NULL if only one DDR controler is present in or DT + */ .globl at91sam9g45_restart at91sam9g45_restart: ldr r5, =at91_ramc_base @ preload constants ldr r0, [r5] + ldr r5, [r5, #4] @ ddr1 + cmp r5, #0 ldr r4, =at91_rstc_base ldr r1, [r4] @@ -30,6 +36,8 @@ at91sam9g45_restart: .balign 32 @ align to cache line + strne r2, [r5, #AT91_DDRSDRC_RTR] @ disable DDR1 access + strne r3, [r5, #AT91_DDRSDRC_LPR] @ power down DDR1 str r2, [r0, #AT91_DDRSDRC_RTR] @ disable DDR0 access str r3, [r0, #AT91_DDRSDRC_LPR] @ power down DDR0 str r4, [r1, #AT91_RSTC_CR] @ reset processor diff --git a/arch/arm/mach-at91/at91x40_time.c b/arch/arm/mach-at91/at91x40_time.c index 2919eba41ff4..c0e637adf65d 100644 --- a/arch/arm/mach-at91/at91x40_time.c +++ b/arch/arm/mach-at91/at91x40_time.c @@ -57,7 +57,7 @@ static irqreturn_t at91x40_timer_interrupt(int irq, void *dev_id) static struct irqaction at91x40_timer_irq = { .name = "at91_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, + .flags = IRQF_TIMER, .handler = at91x40_timer_interrupt }; diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index 92b7f770615a..4078ba93776b 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -176,7 +176,7 @@ static struct at24_platform_data eeprom_info = { .context = (void *)0x7f00, }; -static struct snd_platform_data dm365_evm_snd_data = { +static struct snd_platform_data dm365_evm_snd_data __maybe_unused = { .asp_chan_q = EVENTQ_3, }; diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h index 52b8571b2e70..ce402cd21fa0 100644 --- a/arch/arm/mach-davinci/include/mach/serial.h +++ b/arch/arm/mach-davinci/include/mach/serial.h @@ -15,8 +15,6 @@ #include <mach/hardware.h> -#include <linux/platform_device.h> - #define DAVINCI_UART0_BASE (IO_PHYS + 0x20000) #define DAVINCI_UART1_BASE (IO_PHYS + 0x20400) #define DAVINCI_UART2_BASE (IO_PHYS + 0x20800) @@ -39,6 +37,8 @@ #define UART_DM646X_SCR_TX_WATERMARK 0x08 #ifndef __ASSEMBLY__ +#include <linux/platform_device.h> + extern int davinci_serial_init(struct platform_device *); #endif diff --git a/arch/arm/mach-integrator/pci_v3.h b/arch/arm/mach-integrator/pci_v3.h index 755fd29fed4a..06a9e2e7d007 100644 --- a/arch/arm/mach-integrator/pci_v3.h +++ b/arch/arm/mach-integrator/pci_v3.h @@ -1,2 +1,9 @@ /* Simple oneliner include to the PCIv3 early init */ +#ifdef CONFIG_PCI extern int pci_v3_early_init(void); +#else +static inline int pci_v3_early_init(void) +{ + return 0; +} +#endif diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 4c24303ec481..58adf2fd9cfc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -140,6 +140,7 @@ int __init coherency_init(void) coherency_base = of_iomap(np, 0); coherency_cpu_base = of_iomap(np, 1); set_cpu_coherent(cpu_logical_map(smp_processor_id()), 0); + of_node_put(np); } return 0; @@ -147,9 +148,14 @@ int __init coherency_init(void) static int __init coherency_late_init(void) { - if (of_find_matching_node(NULL, of_coherency_table)) + struct device_node *np; + + np = of_find_matching_node(NULL, of_coherency_table); + if (np) { bus_register_notifier(&platform_bus_type, &mvebu_hwcc_platform_nb); + of_node_put(np); + } return 0; } diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index 3cc4bef6401c..27fc4f049474 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -67,6 +67,7 @@ int __init armada_370_xp_pmsu_init(void) pr_info("Initializing Power Management Service Unit\n"); pmsu_mp_base = of_iomap(np, 0); pmsu_reset_base = of_iomap(np, 1); + of_node_put(np); } return 0; diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c index f875124ff4f9..5175083cdb34 100644 --- a/arch/arm/mach-mvebu/system-controller.c +++ b/arch/arm/mach-mvebu/system-controller.c @@ -98,6 +98,7 @@ static int __init mvebu_system_controller_init(void) BUG_ON(!match); system_controller_base = of_iomap(np, 0); mvebu_sc = (struct mvebu_system_controller *)match->data; + of_node_put(np); } return 0; diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 39c78387ddec..87162e1b94a5 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -129,6 +129,24 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)") .restart = omap3xxx_restart, MACHINE_END +static const char *omap36xx_boards_compat[] __initdata = { + "ti,omap36xx", + NULL, +}; + +DT_MACHINE_START(OMAP36XX_DT, "Generic OMAP36xx (Flattened Device Tree)") + .reserve = omap_reserve, + .map_io = omap3_map_io, + .init_early = omap3630_init_early, + .init_irq = omap_intc_of_init, + .handle_irq = omap3_intc_handle_irq, + .init_machine = omap_generic_init, + .init_late = omap3_init_late, + .init_time = omap3_sync32k_timer_init, + .dt_compat = omap36xx_boards_compat, + .restart = omap3xxx_restart, +MACHINE_END + static const char *omap3_gp_boards_compat[] __initdata = { "ti,omap3-beagle", "timll,omap3-devkit8000", diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index c3270c0f1fce..f6fe388af989 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -167,38 +167,47 @@ static struct lp55xx_led_config rx51_lp5523_led_config[] = { .name = "lp5523:kb1", .chan_nr = 0, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb2", .chan_nr = 1, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb3", .chan_nr = 2, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb4", .chan_nr = 3, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:b", .chan_nr = 4, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:g", .chan_nr = 5, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:r", .chan_nr = 6, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb5", .chan_nr = 7, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb6", .chan_nr = 8, .led_current = 50, + .max_current = 100, } }; diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 64b5a8346982..8b6876c98ce1 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -272,9 +272,19 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base) struct gpmc_timings t; int ret; - if (gpmc_onenand_data->of_node) + if (gpmc_onenand_data->of_node) { gpmc_read_settings_dt(gpmc_onenand_data->of_node, &onenand_async); + if (onenand_async.sync_read || onenand_async.sync_write) { + if (onenand_async.sync_write) + gpmc_onenand_data->flags |= + ONENAND_SYNC_READWRITE; + else + gpmc_onenand_data->flags |= ONENAND_SYNC_READ; + onenand_async.sync_read = false; + onenand_async.sync_write = false; + } + } omap2_onenand_set_async_mode(onenand_base); diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 5d2080ef7923..16f78a990d04 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -28,7 +28,7 @@ #define OMAP_PULL_UP (1 << 4) #define OMAP_ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define OMAP_INPUT_EN (1 << 8) #define OMAP_OFF_EN (1 << 9) #define OMAP_OFFOUT_EN (1 << 10) @@ -36,8 +36,6 @@ #define OMAP_OFF_PULL_EN (1 << 12) #define OMAP_OFF_PULL_UP (1 << 13) #define OMAP_WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define OMAP_WAKEUP_EVENT (1 << 15) /* Active pin states */ diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index fa74a0625da1..ead48fa5715e 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -628,7 +628,7 @@ void __init omap4_local_timer_init(void) #endif /* CONFIG_HAVE_ARM_TWD */ #endif /* CONFIG_ARCH_OMAP4 */ -#ifdef CONFIG_SOC_OMAP5 +#if defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) void __init omap5_realtime_timer_init(void) { omap4_sync32k_timer_init(); @@ -636,7 +636,7 @@ void __init omap5_realtime_timer_init(void) clocksource_of_init(); } -#endif /* CONFIG_SOC_OMAP5 */ +#endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */ /** * omap_timer_init - build and register timer device with an diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 5bd1479d3deb..7f8f6076d360 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -1108,9 +1108,9 @@ static const struct pinctrl_map eva_pinctrl_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("asoc-simple-card.1", "pfc-r8a7740", "fsib_mclk_in", "fsib"), /* GETHER */ - PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740", + PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740", "gether_mii", "gether"), - PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740", + PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740", "gether_int", "gether"), /* HDMI */ PIN_MAP_MUX_GROUP_DEFAULT("sh-mobile-hdmi", "pfc-r8a7740", diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index ffb6f0ac7606..5930af8d434f 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -29,6 +29,7 @@ #include <linux/pinctrl/machine.h> #include <linux/platform_data/gpio-rcar.h> #include <linux/platform_device.h> +#include <linux/phy.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> #include <linux/sh_eth.h> @@ -155,6 +156,30 @@ static void __init lager_add_standard_devices(void) ðer_pdata, sizeof(ether_pdata)); } +/* + * Ether LEDs on the Lager board are named LINK and ACTIVE which corresponds + * to non-default 01 setting of the Micrel KSZ8041 PHY control register 1 bits + * 14-15. We have to set them back to 01 from the default 00 value each time + * the PHY is reset. It's also important because the PHY's LED0 signal is + * connected to SoC's ETH_LINK signal and in the PHY's default mode it will + * bounce on and off after each packet, which we apparently want to avoid. + */ +static int lager_ksz8041_fixup(struct phy_device *phydev) +{ + u16 phyctrl1 = phy_read(phydev, 0x1e); + + phyctrl1 &= ~0xc000; + phyctrl1 |= 0x4000; + return phy_write(phydev, 0x1e, phyctrl1); +} + +static void __init lager_init(void) +{ + lager_add_standard_devices(); + + phy_register_fixup_for_id("r8a7790-ether-ff:01", lager_ksz8041_fixup); +} + static const char *lager_boards_compat_dt[] __initdata = { "renesas,lager", NULL, @@ -163,6 +188,6 @@ static const char *lager_boards_compat_dt[] __initdata = { DT_MACHINE_START(LAGER_DT, "lager") .init_early = r8a7790_init_delay, .init_time = r8a7790_timer_init, - .init_machine = lager_add_standard_devices, + .init_machine = lager_init, .dt_compat = lager_boards_compat_dt, MACHINE_END diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 7aeb5d60e484..e6eb48192912 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -131,6 +131,16 @@ static void tc2_pm_down(u64 residency) } else BUG(); + /* + * If the CPU is committed to power down, make sure + * the power controller will be in charge of waking it + * up upon IRQ, ie IRQ lines are cut from GIC CPU IF + * to the CPU by disabling the GIC CPU IF to prevent wfi + * from completing execution behind power controller back + */ + if (!skip_wfi) + gic_cpu_if_down(); + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&tc2_pm_lock); @@ -231,7 +241,6 @@ static void tc2_pm_suspend(u64 residency) cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point)); - gic_cpu_if_down(); tc2_pm_down(residency); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f5e1a8471714..1272ed202dde 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1232,7 +1232,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) break; len = (j - i) << PAGE_SHIFT; - ret = iommu_map(mapping->domain, iova, phys, len, 0); + ret = iommu_map(mapping->domain, iova, phys, len, + IOMMU_READ|IOMMU_WRITE); if (ret < 0) goto fail; iova += len; @@ -1431,6 +1432,27 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, GFP_KERNEL); } +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; +} + /* * Map a part of the scatter-gather list into contiguous io address space */ @@ -1444,6 +1466,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, int ret = 0; unsigned int count; struct scatterlist *s; + int prot; size = PAGE_ALIGN(size); *handle = DMA_ERROR_CODE; @@ -1460,7 +1483,9 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); - ret = iommu_map(mapping->domain, iova, phys, len, 0); + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, iova, phys, len, prot); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1665,19 +1690,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p if (dma_addr == DMA_ERROR_CODE) return dma_addr; - switch (dir) { - case DMA_BIDIRECTIONAL: - prot = IOMMU_READ | IOMMU_WRITE; - break; - case DMA_TO_DEVICE: - prot = IOMMU_READ; - break; - case DMA_FROM_DEVICE: - prot = IOMMU_WRITE; - break; - default: - prot = 0; - } + prot = __dma_direction_to_prot(dir); ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index febaee7ca57b..18ec4c504abf 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -17,7 +17,6 @@ #include <linux/nodemask.h> #include <linux/initrd.h> #include <linux/of_fdt.h> -#include <linux/of_reserved_mem.h> #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/memblock.h> @@ -379,8 +378,6 @@ void __init arm_memblock_init(struct meminfo *mi, if (mdesc->reserve) mdesc->reserve(); - early_init_dt_scan_reserved_mem(); - /* * reserve memory for DMA contigouos allocations, * must come from DMA area inside low memory diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1a6bfe954d49..835c559786bd 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,13 +6,6 @@ config FRAME_POINTER bool default y -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T output. - config EARLY_PRINTK bool "Early printk support" default y diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5b3e83217b03..31c81e9b792e 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -42,7 +42,7 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y -# CONFIG_BLK_DEV is not set +CONFIG_BLK_DEV=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y @@ -72,6 +72,7 @@ CONFIG_LOGO=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_FUSE_FS=y @@ -90,3 +91,5 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y # CONFIG_FTRACE is not set CONFIG_ATOMIC64_SELFTEST=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_BLK=y diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index db805092698c..dd8ecfc3f995 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -177,6 +177,11 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } +static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +{ + return vcpu_sys_reg(vcpu, MPIDR_EL1); +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0859a4ddd1e7..5d85a02d1231 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -36,11 +36,6 @@ #define KVM_VCPU_MAX_FEATURES 2 -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -151,6 +146,7 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); struct kvm_one_reg; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index efe609c6a3c9..680f74e67497 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -91,6 +91,7 @@ int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) +#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) static inline bool kvm_is_write_fault(unsigned long esr) { @@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte) pte_val(*pte) |= PTE_S2_RDWR; } +static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +{ + pmd_val(*pmd) |= PMD_S2_RDWR; +} + struct kvm; -static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, + unsigned long size) { if (!icache_is_aliasing()) { /* PIPT */ - unsigned long hva = gfn_to_hva(kvm, gfn); - flush_icache_range(hva, hva + PAGE_SIZE); + flush_icache_range(hva, hva + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ /* any kind of VIPT cache */ __flush_icache_all(); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d57e66845c86..755f86143320 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -85,6 +85,8 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ + /* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index edb3d5c73a32..7ecc2b23882e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -166,9 +166,10 @@ do { \ #define get_user(x, ptr) \ ({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \ - __get_user((x), (ptr)) : \ + access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ + __get_user((x), __p) : \ ((x) = 0, -EFAULT); \ }) @@ -227,9 +228,10 @@ do { \ #define put_user(x, ptr) \ ({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ - __put_user((x), (ptr)) : \ + access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ + __put_user((x), __p) : \ -EFAULT; \ }) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1f2e4d5a5c0f..bb785d23dbde 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -80,8 +80,10 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_load_state(¤t->thread.fpsimd_state); + preempt_enable(); } #ifdef CONFIG_KERNEL_MODE_NEON diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2c3ff67a8ecb..3f0731e53274 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, return kvm_reset_vcpu(vcpu); } +int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +{ + int target = kvm_target_cpu(); + + if (target < 0) + return -ENODEV; + + memset(init, 0, sizeof(*init)); + + /* + * For now, we don't return any features. + * In future, we might use features to return target + * specific features available for the preferred + * target type. + */ + init->target = (__u32)target; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S index 8ae80a18e8ec..19da91e0cd27 100644 --- a/arch/arm64/mm/tlb.S +++ b/arch/arm64/mm/tlb.S @@ -35,7 +35,7 @@ */ ENTRY(__cpu_flush_user_tlb_range) vma_vm_mm x3, x2 // get vma->vm_mm - mmid x3, x3 // get vm_mm->context.id + mmid w3, x3 // get vm_mm->context.id dsb sy lsr x0, x0, #12 // align address lsr x1, x1, #12 diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index d22af851f3f6..fd7980743890 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,5 +1,19 @@ generic-y += clkdev.h +generic-y += cputime.h +generic-y += delay.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h -generic-y += trace_clock.h +generic-y += futex.h +generic-y += irq_regs.h generic-y += param.h +generic-y += local.h +generic-y += local64.h +generic-y += percpu.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/avr32/include/asm/cputime.h b/arch/avr32/include/asm/cputime.h deleted file mode 100644 index e87e0f81cbeb..000000000000 --- a/arch/avr32/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_CPUTIME_H -#define __ASM_AVR32_CPUTIME_H - -#include <asm-generic/cputime.h> - -#endif /* __ASM_AVR32_CPUTIME_H */ diff --git a/arch/avr32/include/asm/delay.h b/arch/avr32/include/asm/delay.h deleted file mode 100644 index 9670e127b7b2..000000000000 --- a/arch/avr32/include/asm/delay.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/delay.h> diff --git a/arch/avr32/include/asm/device.h b/arch/avr32/include/asm/device.h deleted file mode 100644 index d8f9872b0e2d..000000000000 --- a/arch/avr32/include/asm/device.h +++ /dev/null @@ -1,7 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#include <asm-generic/device.h> - diff --git a/arch/avr32/include/asm/div64.h b/arch/avr32/include/asm/div64.h deleted file mode 100644 index d7ddd4fdeca6..000000000000 --- a/arch/avr32/include/asm/div64.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_DIV64_H -#define __ASM_AVR32_DIV64_H - -#include <asm-generic/div64.h> - -#endif /* __ASM_AVR32_DIV64_H */ diff --git a/arch/avr32/include/asm/emergency-restart.h b/arch/avr32/include/asm/emergency-restart.h deleted file mode 100644 index 3e7e014776ba..000000000000 --- a/arch/avr32/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_EMERGENCY_RESTART_H -#define __ASM_AVR32_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */ diff --git a/arch/avr32/include/asm/futex.h b/arch/avr32/include/asm/futex.h deleted file mode 100644 index 10419f14a68a..000000000000 --- a/arch/avr32/include/asm/futex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_FUTEX_H -#define __ASM_AVR32_FUTEX_H - -#include <asm-generic/futex.h> - -#endif /* __ASM_AVR32_FUTEX_H */ diff --git a/arch/avr32/include/asm/irq_regs.h b/arch/avr32/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/avr32/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/avr32/include/asm/local.h b/arch/avr32/include/asm/local.h deleted file mode 100644 index 1c1619694da3..000000000000 --- a/arch/avr32/include/asm/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_LOCAL_H -#define __ASM_AVR32_LOCAL_H - -#include <asm-generic/local.h> - -#endif /* __ASM_AVR32_LOCAL_H */ diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/avr32/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/avr32/include/asm/percpu.h b/arch/avr32/include/asm/percpu.h deleted file mode 100644 index 69227b4cd0d4..000000000000 --- a/arch/avr32/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_PERCPU_H -#define __ASM_AVR32_PERCPU_H - -#include <asm-generic/percpu.h> - -#endif /* __ASM_AVR32_PERCPU_H */ diff --git a/arch/avr32/include/asm/scatterlist.h b/arch/avr32/include/asm/scatterlist.h deleted file mode 100644 index a5902d9834e8..000000000000 --- a/arch/avr32/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_SCATTERLIST_H -#define __ASM_AVR32_SCATTERLIST_H - -#include <asm-generic/scatterlist.h> - -#endif /* __ASM_AVR32_SCATTERLIST_H */ diff --git a/arch/avr32/include/asm/sections.h b/arch/avr32/include/asm/sections.h deleted file mode 100644 index aa14252e4181..000000000000 --- a/arch/avr32/include/asm/sections.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_SECTIONS_H -#define __ASM_AVR32_SECTIONS_H - -#include <asm-generic/sections.h> - -#endif /* __ASM_AVR32_SECTIONS_H */ diff --git a/arch/avr32/include/asm/topology.h b/arch/avr32/include/asm/topology.h deleted file mode 100644 index 5b766cbb4806..000000000000 --- a/arch/avr32/include/asm/topology.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_TOPOLOGY_H -#define __ASM_AVR32_TOPOLOGY_H - -#include <asm-generic/topology.h> - -#endif /* __ASM_AVR32_TOPOLOGY_H */ diff --git a/arch/avr32/include/asm/xor.h b/arch/avr32/include/asm/xor.h deleted file mode 100644 index 99c87aa0af4f..000000000000 --- a/arch/avr32/include/asm/xor.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_XOR_H -#define _ASM_XOR_H - -#include <asm-generic/xor.h> - -#endif diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index c2731003edef..42a53e740a7e 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -289,7 +289,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, memset(childregs, 0, sizeof(struct pt_regs)); p->thread.cpu_context.r0 = arg; p->thread.cpu_context.r1 = usp; /* fn */ - p->thread.cpu_context.r2 = syscall_return; + p->thread.cpu_context.r2 = (unsigned long)syscall_return; p->thread.cpu_context.pc = (unsigned long)ret_from_kernel_thread; childregs->sr = MODE_SUPERVISOR; } else { diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 869a1c6ffeee..12f828ad5058 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -98,7 +98,14 @@ static void comparator_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_SHUTDOWN: sysreg_write(COMPARE, 0); pr_debug("%s: stop\n", evdev->name); - cpu_idle_poll_ctrl(false); + if (evdev->mode == CLOCK_EVT_MODE_ONESHOT || + evdev->mode == CLOCK_EVT_MODE_RESUME) { + /* + * Only disable idle poll if we have forced that + * in a previous call. + */ + cpu_idle_poll_ctrl(false); + } break; default: BUG(); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 989dd3fe8de1..db95f570705f 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -234,10 +234,6 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) 1 - struct kvm; struct kvm_vcpu; @@ -480,7 +476,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bdfd8789b376..985bf80c622e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c index 4a9baa9f6330..9969dbab19e3 100644 --- a/arch/mips/alchemy/board-mtx1.c +++ b/arch/mips/alchemy/board-mtx1.c @@ -276,7 +276,7 @@ static struct platform_device mtx1_pci_host = { .resource = alchemy_pci_host_res, }; -static struct __initdata platform_device * mtx1_devs[] = { +static struct platform_device *mtx1_devs[] __initdata = { &mtx1_pci_host, &mtx1_gpio_leds, &mtx1_wdt, diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 51680d15ca8e..d445d060e346 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -187,7 +187,7 @@ /* * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other - * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and + * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and * has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ. */ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 4d6d77ed9b9d..e194f957ca8c 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\tnop\n\t" + asm_volatile_goto("1:\tnop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4d6fa0bf1305..32966969f2f9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -27,13 +27,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 -/* Don't support huge pages */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 - -/* We don't currently support large pages. */ -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) 1 - /* Special address that contains the comm page, used for reducing # of traps */ diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index 4204d76af854..029e002a4ea0 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -73,7 +73,7 @@ 3: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 38af83f84c4a..20b7b040e76f 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -67,7 +67,7 @@ LEAF(resume) 1: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 921238a6bd26..078de5eaca8f 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -69,7 +69,7 @@ 1: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b044536de4..73b34827826c 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 627883bc6d5f..bc6f96fcb529 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -609,6 +609,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) r4k_blast_scache(); else blast_scache_range(addr, addr + size); + preempt_enable(); __sync(); return; } @@ -650,6 +651,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) */ blast_inv_scache_range(addr, addr + size); } + preempt_enable(); __sync(); return; } diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index f25a7e9f8cbc..5f8b95512580 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -308,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (cpu_needs_post_dma_flush(dev)) + if (cpu_needs_post_dma_flush(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } static void mips_dma_sync_sg_for_device(struct device *dev, @@ -321,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (!plat_device_is_coherent(dev)) + if (!plat_device_is_coherent(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h index eb59bfe23e85..93c9980e1b6b 100644 --- a/arch/openrisc/include/asm/prom.h +++ b/arch/openrisc/include/asm/prom.h @@ -14,53 +14,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - -#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ - #ifndef _ASM_OPENRISC_PROM_H #define _ASM_OPENRISC_PROM_H -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <linux/types.h> -#include <asm/irq.h> -#include <linux/irqdomain.h> -#include <linux/atomic.h> -#include <linux/of_irq.h> -#include <linux/of_fdt.h> -#include <linux/of_address.h> -#include <linux/proc_fs.h> -#include <linux/platform_device.h> #define HAVE_ARCH_DEVTREE_FIXUPS -/* Other Prototypes */ -extern int early_uartlite_console(void); - -/* Parse the ibm,dma-window property of an OF node into the busno, phys and - * size parameters. - */ -void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, - unsigned long *busno, unsigned long *phys, unsigned long *size); - -extern void kdump_move_device_tree(void); - -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - -/** - * of_irq_map_pci - Resolve the interrupt for a PCI device - * @pdev: the device whose interrupt is to be resolved - * @out_irq: structure of_irq filled by this function - * - * This function resolves the PCI interrupt for a given PCI device. If a - * device-node exists for a given pci_dev, it will use normal OF tree - * walking. If not, it will implement standard swizzling and walk up the - * PCI tree until an device-node is found, at which point it will finish - * resolving using the OF tree walking. - */ -struct pci_dev; -extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_OPENRISC_PROM_H */ diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 1945f995f2df..4736020ba5ea 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -6,7 +6,7 @@ struct pt_regs; /* traps.c */ void parisc_terminate(char *msg, struct pt_regs *regs, - int code, unsigned long offset); + int code, unsigned long offset) __noreturn __cold; /* mm/fault.c */ void do_page_fault(struct pt_regs *regs, unsigned long code, diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c035673209f7..b521c0adf4ec 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -602,6 +602,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } +EXPORT_SYMBOL_GPL(flush_cache_page); #ifdef CONFIG_PARISC_TMPALIAS diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 8a252f2d6c08..2b96602e812f 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -72,7 +72,6 @@ enum ipi_message_type { IPI_NOP=0, IPI_RESCHEDULE=1, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_START, IPI_CPU_STOP, IPI_CPU_TEST @@ -164,11 +163,6 @@ ipi_interrupt(int irq, void *dev_id) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC_SINGLE\n", this_cpu); - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_START: smp_debug(100, KERN_DEBUG "CPU%d IPI_CPU_START\n", this_cpu); break; @@ -260,7 +254,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); + send_IPI_single(cpu, IPI_CALL_FUNC); } /* diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 04e47c6a4562..1cd1d0c83b6d 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -291,11 +291,6 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) do_exit(SIGSEGV); } -int syscall_ipi(int (*syscall) (struct pt_regs *), struct pt_regs *regs) -{ - return syscall(regs); -} - /* gdb uses break 4,8 */ #define GDB_BREAK_INSN 0x10004 static void handle_gdb_break(struct pt_regs *regs, int wot) @@ -805,14 +800,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs) else { /* - * The kernel should never fault on its own address space. + * The kernel should never fault on its own address space, + * unless pagefault_disable() was called before. */ - if (fault_space == 0) + if (fault_space == 0 && !in_atomic()) { pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); - } } diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index ac4370b1ca40..b5507ec06b84 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -56,7 +56,7 @@ #ifdef __KERNEL__ #include <linux/module.h> #include <linux/compiler.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #define s_space "%%sr1" #define d_space "%%sr2" #else @@ -524,4 +524,17 @@ EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(copy_from_user); EXPORT_SYMBOL(copy_in_user); EXPORT_SYMBOL(memcpy); + +long probe_kernel_read(void *dst, const void *src, size_t size) +{ + unsigned long addr = (unsigned long)src; + + if (size < 0 || addr < PAGE_SIZE) + return -EFAULT; + + /* check for I/O space F_EXTEND(0xfff00000) access as well? */ + + return __probe_kernel_read(dst, src, size); +} + #endif diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index d10d27a720c0..0293588d5b8c 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -171,17 +171,25 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { struct vm_area_struct *vma, *prev_vma; - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; + struct task_struct *tsk; + struct mm_struct *mm; unsigned long acc_type; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned int flags; - if (in_atomic() || !mm) + if (in_atomic()) goto no_context; + tsk = current; + mm = tsk->mm; + if (!mm) + goto no_context; + + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; retry: @@ -196,8 +204,6 @@ retry: good_area: - acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) goto bad_area; diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6a15c968d214..15ca2255f438 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c -src-plat-y := of.c +src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ treeboot-walnut.c cuboot-acadia.c \ cuboot-kilauea.c simpleboot.c \ @@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ prpmc2800.c src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c -src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c +src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c new file mode 100644 index 000000000000..c10191006673 --- /dev/null +++ b/arch/powerpc/boot/epapr-wrapper.c @@ -0,0 +1,9 @@ +extern void epapr_platform_init(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, + unsigned long r7); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_platform_init(r3, r4, r5, r6, r7); +} diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c index 06c1961bd124..02e91aa2194a 100644 --- a/arch/powerpc/boot/epapr.c +++ b/arch/powerpc/boot/epapr.c @@ -48,8 +48,8 @@ static void platform_fixups(void) fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); } -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) { epapr_magic = r6; ima_size = r7; diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 61d9899aa0d0..62e2f43ec1df 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -26,6 +26,9 @@ static unsigned long claim_base; +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7); + static void *of_try_claim(unsigned long size) { unsigned long addr = 0; @@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr) } } -void platform_init(unsigned long a1, unsigned long a2, void *promptr) +static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) { platform_ops.image_hdr = of_image_hdr; platform_ops.malloc = of_try_claim; @@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr) loader_info.initrd_size = a2; } } + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Detect OF vs. ePAPR boot */ + if (r5) + of_platform_init(r3, r4, (void *)r5); + else + epapr_platform_init(r3, r4, r5, r6, r7); +} + diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6761c746048d..cd7af841ba05 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -148,18 +148,18 @@ make_space=y case "$platform" in pseries) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x4000000' ;; maple) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x400000' ;; pmac|chrp) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" ;; coff) - platformo="$object/crt0.o $object/of.o" + platformo="$object/crt0.o $object/of.o $object/epapr.o" lds=$object/zImage.coff.lds link_address='0x500000' pie= @@ -253,6 +253,7 @@ treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; epapr) + platformo="$object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie ;; diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 9b198d1b3b2b..856f8deb557a 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst) return inst & 0xffff; } +static inline unsigned int get_oc(u32 inst) +{ + return (inst >> 11) & 0x7fff; +} #endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index cca12f084842..894662a5d4d5 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -198,12 +198,27 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) cmpwi r10,0; \ bne do_kvm_##n +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * If hv is possible, interrupts come into to the hv version + * of the kvmppc_interrupt code, which then jumps to the PR handler, + * kvmppc_interrupt_pr, if the guest is a PR guest. + */ +#define kvmppc_interrupt kvmppc_interrupt_hv +#else +#define kvmppc_interrupt kvmppc_interrupt_pr +#endif + #define __KVM_HANDLER(area, h, n) \ do_kvm_##n: \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ std r10,HSTATE_CFAR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ + BEGIN_FTR_SECTION_NESTED(948) \ + ld r10,area+EX_PPR(r13); \ + std r10,HSTATE_PPR(r13); \ + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ ld r10,area+EX_R10(r13); \ stw r9,HSTATE_SCRATCH1(r13); \ ld r9,area+EX_R9(r13); \ @@ -217,6 +232,10 @@ do_kvm_##n: \ ld r10,area+EX_R10(r13); \ beq 89f; \ stw r9,HSTATE_SCRATCH1(r13); \ + BEGIN_FTR_SECTION_NESTED(948) \ + ld r9,area+EX_PPR(r13); \ + std r9,HSTATE_PPR(r13); \ + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ ld r9,area+EX_R9(r13); \ std r12,HSTATE_SCRATCH0(r13); \ li r12,n; \ @@ -236,7 +255,7 @@ do_kvm_##n: \ #define KVM_HANDLER_SKIP(area, h, n) #endif -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE #define KVMTEST_PR(n) __KVMTEST(n) #define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) #define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 0e40843a1c6e..41f13cec8a8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call_handle_irq(int irq, void *p1, - struct thread_info *tp, void *func); +extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index ae098c438f00..f016bb699b5f 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -19,7 +19,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 851bac7afa4b..1bd92fd43cfb 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -123,6 +123,8 @@ #define BOOK3S_HFLAG_SLB 0x2 #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 #define BOOK3S_HFLAG_NATIVE_PS 0x8 +#define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 +#define BOOK3S_HFLAG_NEW_TLBIE 0x20 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ @@ -136,6 +138,8 @@ #define KVM_GUEST_MODE_NONE 0 #define KVM_GUEST_MODE_GUEST 1 #define KVM_GUEST_MODE_SKIP 2 +#define KVM_GUEST_MODE_GUEST_HV 3 +#define KVM_GUEST_MODE_HOST_HV 4 #define KVM_INST_FETCH_FAILED -1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index fa19e2f1a874..4a594b76674d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -58,16 +58,18 @@ struct hpte_cache { struct hlist_node list_pte_long; struct hlist_node list_vpte; struct hlist_node list_vpte_long; +#ifdef CONFIG_PPC_BOOK3S_64 + struct hlist_node list_vpte_64k; +#endif struct rcu_head rcu_head; u64 host_vpn; u64 pfn; ulong slot; struct kvmppc_pte pte; + int pagesize; }; struct kvmppc_vcpu_book3s { - struct kvm_vcpu vcpu; - struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; struct kvmppc_sid_map sid_map[SID_MAP_NUM]; struct { u64 esid; @@ -99,6 +101,9 @@ struct kvmppc_vcpu_book3s { struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; +#ifdef CONFIG_PPC_BOOK3S_64 + struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K]; +#endif int hpte_cache_count; spinlock_t mmu_lock; }; @@ -107,8 +112,9 @@ struct kvmppc_vcpu_book3s { #define CONTEXT_GUEST 1 #define CONTEXT_GUEST_END 2 -#define VSID_REAL 0x0fffffffffc00000ULL -#define VSID_BAT 0x0fffffffffb00000ULL +#define VSID_REAL 0x07ffffffffc00000ULL +#define VSID_BAT 0x07ffffffffb00000ULL +#define VSID_64K 0x0800000000000000ULL #define VSID_1T 0x1000000000000000ULL #define VSID_REAL_DR 0x2000000000000000ULL #define VSID_REAL_IR 0x4000000000000000ULL @@ -118,11 +124,12 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask) extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); -extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr); extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); -extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); +extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, + bool iswrite); +extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); @@ -134,6 +141,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte); extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); @@ -151,7 +159,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, + bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, @@ -172,6 +181,8 @@ extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long *hpret); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, + unsigned long mask); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); @@ -184,11 +195,9 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { - return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); + return vcpu->arch.book3s; } -extern void kvm_return_point(void); - /* Also add subarch specific defines */ #ifdef CONFIG_KVM_BOOK3S_32_HANDLER @@ -198,203 +207,6 @@ extern void kvm_return_point(void); #include <asm/kvm_book3s_64.h> #endif -#ifdef CONFIG_KVM_BOOK3S_PR - -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) -{ - return to_book3s(vcpu)->hior; -} - -static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, - unsigned long pending_now, unsigned long old_pending) -{ - if (pending_now) - vcpu->arch.shared->int_pending = 1; - else if (old_pending) - vcpu->arch.shared->int_pending = 0; -} - -static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) -{ - if ( num < 14 ) { - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->gpr[num] = val; - svcpu_put(svcpu); - to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; - } else - vcpu->arch.gpr[num] = val; -} - -static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) -{ - if ( num < 14 ) { - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong r = svcpu->gpr[num]; - svcpu_put(svcpu); - return r; - } else - return vcpu->arch.gpr[num]; -} - -static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->cr = val; - svcpu_put(svcpu); - to_book3s(vcpu)->shadow_vcpu->cr = val; -} - -static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - u32 r; - r = svcpu->cr; - svcpu_put(svcpu); - return r; -} - -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->xer = val; - to_book3s(vcpu)->shadow_vcpu->xer = val; - svcpu_put(svcpu); -} - -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - u32 r; - r = svcpu->xer; - svcpu_put(svcpu); - return r; -} - -static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->ctr = val; - svcpu_put(svcpu); -} - -static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong r; - r = svcpu->ctr; - svcpu_put(svcpu); - return r; -} - -static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->lr = val; - svcpu_put(svcpu); -} - -static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong r; - r = svcpu->lr; - svcpu_put(svcpu); - return r; -} - -static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - svcpu->pc = val; - svcpu_put(svcpu); -} - -static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong r; - r = svcpu->pc; - svcpu_put(svcpu); - return r; -} - -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - ulong pc = kvmppc_get_pc(vcpu); - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - u32 r; - - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); - - r = svcpu->last_inst; - svcpu_put(svcpu); - return r; -} - -/* - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. - * Because the sc instruction sets SRR0 to point to the following - * instruction, we have to fetch from pc - 4. - */ -static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) -{ - ulong pc = kvmppc_get_pc(vcpu) - 4; - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - u32 r; - - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); - - r = svcpu->last_inst; - svcpu_put(svcpu); - return r; -} - -static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) -{ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong r; - r = svcpu->fault_dar; - svcpu_put(svcpu); - return r; -} - -static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) -{ - ulong crit_raw = vcpu->arch.shared->critical; - ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); - bool crit; - - /* Truncate crit indicators in 32 bit mode */ - if (!(vcpu->arch.shared->msr & MSR_SF)) { - crit_raw &= 0xffffffff; - crit_r1 &= 0xffffffff; - } - - /* Critical section when crit == r1 */ - crit = (crit_raw == crit_r1); - /* ... and we're in supervisor mode */ - crit = crit && !(vcpu->arch.shared->msr & MSR_PR); - - return crit; -} -#else /* CONFIG_KVM_BOOK3S_PR */ - -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, - unsigned long pending_now, unsigned long old_pending) -{ -} - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; @@ -489,12 +301,6 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } -static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) -{ - return false; -} -#endif - /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h index ce0ef6ce8f86..c720e0b3238d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_32.h +++ b/arch/powerpc/include/asm/kvm_book3s_32.h @@ -22,7 +22,7 @@ static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { - return to_book3s(vcpu)->shadow_vcpu; + return vcpu->arch.shadow_vcpu; } static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 86d638a3b359..bf0fa8b0a883 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -20,7 +20,7 @@ #ifndef __ASM_KVM_BOOK3S_64_H__ #define __ASM_KVM_BOOK3S_64_H__ -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { preempt_disable(); @@ -35,7 +35,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define SPAPR_TCE_SHIFT 12 -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ extern unsigned long kvm_rma_pages; #endif @@ -278,7 +278,7 @@ static inline int is_vrma_hpte(unsigned long hpte_v) (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); } -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * Note modification of an HPTE; set the HPTE modified bit * if anyone is interested. @@ -289,6 +289,6 @@ static inline void note_hpte_modification(struct kvm *kvm, if (atomic_read(&kvm->arch.hpte_mod_interest)) rev->guest_rpte |= HPTE_GR_MODIFIED; } -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 9039d3c97eec..0bd9348a4db9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -83,7 +83,7 @@ struct kvmppc_host_state { u8 restore_hid5; u8 napping; -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE u8 hwthread_req; u8 hwthread_state; u8 host_ipi; @@ -101,6 +101,7 @@ struct kvmppc_host_state { #endif #ifdef CONFIG_PPC_BOOK3S_64 u64 cfar; + u64 ppr; #endif }; @@ -108,14 +109,14 @@ struct kvmppc_book3s_shadow_vcpu { ulong gpr[14]; u32 cr; u32 xer; - - u32 fault_dsisr; - u32 last_inst; ulong ctr; ulong lr; ulong pc; + ulong shadow_srr1; ulong fault_dar; + u32 fault_dsisr; + u32 last_inst; #ifdef CONFIG_PPC_BOOK3S_32 u32 sr[16]; /* Guest SRs */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index d3c1eb34c986..dd8f61510dfd 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -26,7 +26,12 @@ /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS 64 -#define KVMPPC_INST_EHPRIV 0x7c00021c +#define KVMPPC_INST_EHPRIV 0x7c00021c +#define EHPRIV_OC_SHIFT 11 +/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ +#define EHPRIV_OC_DEBUG 1 +#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ + (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 33283532e9d8..237d1d25b448 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -63,20 +63,17 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); #endif -/* We don't currently support large pages. */ -#define KVM_HPAGE_GFN_SHIFT(x) 0 -#define KVM_NR_PAGE_SIZES 1 -#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 #define HPTEG_HASH_BITS_VPTE 13 #define HPTEG_HASH_BITS_VPTE_LONG 5 +#define HPTEG_HASH_BITS_VPTE_64K 11 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) #define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) +#define HPTEG_HASH_NUM_VPTE_64K (1 << HPTEG_HASH_BITS_VPTE_64K) /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL @@ -89,6 +86,9 @@ struct lppaca; struct slb_shadow; struct dtl_entry; +struct kvmppc_vcpu_book3s; +struct kvmppc_book3s_shadow_vcpu; + struct kvm_vm_stat { u32 remote_tlb_flush; }; @@ -224,15 +224,15 @@ struct revmap_entry { #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch_memory_slot { -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long *rmap; unsigned long *slot_phys; -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ }; struct kvm_arch { unsigned int lpid; -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long hpt_virt; struct revmap_entry *revmap; unsigned int host_lpid; @@ -256,7 +256,10 @@ struct kvm_arch { cpumask_t need_tlb_flush; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; int hpt_cma_alloc; -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + struct mutex hpt_mutex; +#endif #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; @@ -267,6 +270,7 @@ struct kvm_arch { #ifdef CONFIG_KVM_XICS struct kvmppc_xics *xics; #endif + struct kvmppc_ops *kvm_ops; }; /* @@ -294,6 +298,10 @@ struct kvmppc_vcore { u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; + u64 tb_offset; /* guest timebase - host timebase */ + ulong lpcr; + u32 arch_compat; + ulong pcr; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -328,6 +336,7 @@ struct kvmppc_pte { bool may_read : 1; bool may_write : 1; bool may_execute : 1; + u8 page_size; /* MMU_PAGE_xxx */ }; struct kvmppc_mmu { @@ -340,7 +349,8 @@ struct kvmppc_mmu { /* book3s */ void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value); u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); - int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); + int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *pte, bool data, bool iswrite); void (*reset_msr)(struct kvm_vcpu *vcpu); void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); @@ -360,6 +370,7 @@ struct kvmppc_slb { bool large : 1; /* PTEs are 16MB */ bool tb : 1; /* 1TB segment */ bool class : 1; + u8 base_page_size; /* MMU_PAGE_xxx */ }; # ifdef CONFIG_PPC_FSL_BOOK3E @@ -377,17 +388,6 @@ struct kvmppc_slb { #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ -struct kvmppc_booke_debug_reg { - u32 dbcr0; - u32 dbcr1; - u32 dbcr2; -#ifdef CONFIG_KVM_E500MC - u32 dbcr4; -#endif - u64 iac[KVMPPC_BOOKE_MAX_IAC]; - u64 dac[KVMPPC_BOOKE_MAX_DAC]; -}; - #define KVMPPC_IRQ_DEFAULT 0 #define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_XICS 2 @@ -402,6 +402,10 @@ struct kvm_vcpu_arch { int slb_max; /* 1 + index of last valid entry in slb[] */ int slb_nr; /* total number of entries in SLB */ struct kvmppc_mmu mmu; + struct kvmppc_vcpu_book3s *book3s; +#endif +#ifdef CONFIG_PPC_BOOK3S_32 + struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif ulong gpr[32]; @@ -463,6 +467,8 @@ struct kvm_vcpu_arch { u32 ctrl; ulong dabr; ulong cfar; + ulong ppr; + ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; @@ -498,6 +504,8 @@ struct kvm_vcpu_arch { u64 mmcr[3]; u32 pmc[8]; + u64 siar; + u64 sdar; #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; @@ -531,7 +539,10 @@ struct kvm_vcpu_arch { u32 eptcfg; u32 epr; u32 crit_save; - struct kvmppc_booke_debug_reg dbg_reg; + /* guest debug registers*/ + struct debug_reg dbg_reg; + /* hardware visible debug registers when in guest state */ + struct debug_reg shadow_dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; @@ -582,7 +593,7 @@ struct kvm_vcpu_arch { struct kvmppc_icp *icp; /* XICS presentation controller */ #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE struct kvm_vcpu_arch_shared shregs; unsigned long pgfault_addr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b15554a26c20..c8317fbf92c4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -106,13 +106,6 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); - -extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int op, int *advance); -extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, - ulong val); -extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, - ulong *val); extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); @@ -135,17 +128,17 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); -extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, - struct kvm_allocate_rma *rma); extern struct kvm_rma_info *kvm_alloc_rma(void); extern void kvm_release_rma(struct kvm_rma_info *ri); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -177,6 +170,72 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); +union kvmppc_one_reg { + u32 wval; + u64 dval; + vector128 vval; + u64 vsxval[2]; + struct { + u64 addr; + u64 length; + } vpaval; +}; + +struct kvmppc_ops { + struct module *owner; + int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); + int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + void (*vcpu_put)(struct kvm_vcpu *vcpu); + void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); + void (*vcpu_free)(struct kvm_vcpu *vcpu); + int (*check_requests)(struct kvm_vcpu *vcpu); + int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log); + void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); + int (*prepare_memory_region)(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem); + void (*commit_memory_region)(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old); + int (*unmap_hva)(struct kvm *kvm, unsigned long hva); + int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, + unsigned long end); + int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*test_age_hva)(struct kvm *kvm, unsigned long hva); + void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); + void (*mmu_destroy)(struct kvm_vcpu *vcpu); + void (*free_memslot)(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); + int (*create_memslot)(struct kvm_memory_slot *slot, + unsigned long npages); + int (*init_vm)(struct kvm *kvm); + void (*destroy_vm)(struct kvm *kvm); + int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); + int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); + int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); + int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); + void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); + long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, + unsigned long arg); + +}; + +extern struct kvmppc_ops *kvmppc_hv_ops; +extern struct kvmppc_ops *kvmppc_pr_ops; + +static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) +{ + return kvm->arch.kvm_ops == kvmppc_hv_ops; +} + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -210,17 +269,6 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } -union kvmppc_one_reg { - u32 wval; - u64 dval; - vector128 vval; - u64 vsxval[2]; - struct { - u64 addr; - u64 length; - } vpaval; -}; - #define one_reg_size(id) \ (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -245,10 +293,10 @@ union kvmppc_one_reg { __v; \ }) -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); -void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); @@ -260,7 +308,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); struct openpic; -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE extern void kvm_cma_reserve(void) __init; static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { @@ -269,10 +317,10 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline u32 kvmppc_get_xics_latch(void) { - u32 xirr = get_paca()->kvm_hstate.saved_xirr; + u32 xirr; + xirr = get_paca()->kvm_hstate.saved_xirr; get_paca()->kvm_hstate.saved_xirr = 0; - return xirr; } @@ -281,7 +329,10 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) paca[cpu].kvm_hstate.host_ipi = host_ipi; } -extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); +} #else static inline void __init kvm_cma_reserve(void) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index a5954cebbc55..b6ea9e068c13 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -166,7 +166,7 @@ struct paca_struct { struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ #ifdef CONFIG_KVM_BOOK3S_HANDLER -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE /* We use this to store guest state in */ struct kvmppc_book3s_shadow_vcpu shadow_vcpu; #endif diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e378cccfca55..75a9e5a34ef9 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -147,22 +147,7 @@ typedef struct { #define TS_FPR(i) fpr[i][TS_FPROFFSET] #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET] -struct thread_struct { - unsigned long ksp; /* Kernel stack pointer */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ - -#ifdef CONFIG_PPC64 - unsigned long ksp_vsid; -#endif - struct pt_regs *regs; /* Pointer to saved register state */ - mm_segment_t fs; /* for get_fs() validation */ -#ifdef CONFIG_BOOKE - /* BookE base exception scratch space; align on cacheline */ - unsigned long normsave[8] ____cacheline_aligned; -#endif -#ifdef CONFIG_PPC32 - void *pgdir; /* root of page-table tree */ -#endif +struct debug_reg { #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* * The following help to manage the use of Debug Control Registers @@ -199,6 +184,27 @@ struct thread_struct { unsigned long dvc2; #endif #endif +}; + +struct thread_struct { + unsigned long ksp; /* Kernel stack pointer */ + +#ifdef CONFIG_PPC64 + unsigned long ksp_vsid; +#endif + struct pt_regs *regs; /* Pointer to saved register state */ + mm_segment_t fs; /* for get_fs() validation */ +#ifdef CONFIG_BOOKE + /* BookE base exception scratch space; align on cacheline */ + unsigned long normsave[8] ____cacheline_aligned; +#endif +#ifdef CONFIG_PPC32 + void *pgdir; /* root of page-table tree */ + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ +#endif + /* Debug Registers */ + struct debug_reg debug; + /* FP and VSX 0-31 register set */ double fpr[32][TS_FPRWIDTH] __attribute__((aligned(16))); struct { @@ -321,7 +327,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {{0}}, \ diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h index 0156702ba24e..576ad88104cb 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/pte-book3e.h @@ -40,7 +40,7 @@ #define _PAGE_U1 0x010000 #define _PAGE_U0 0x020000 #define _PAGE_ACCESSED 0x040000 -#define _PAGE_LENDIAN 0x080000 +#define _PAGE_ENDIAN 0x080000 #define _PAGE_GUARDED 0x100000 #define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */ #define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 10d1ef016bf1..e294673e9d4b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -243,6 +243,7 @@ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ +#define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */ #define SPRN_SPURR 0x134 /* Scaled PURR */ #define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ #define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ @@ -283,6 +284,7 @@ #define LPCR_ISL (1ul << (63-2)) #define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) +#define LPCR_DPFD (7ul << LPCR_DPFD_SH) #define LPCR_VRMASD (0x1ful << (63-16)) #define LPCR_VRMA_L (1ul << (63-12)) #define LPCR_VRMA_LP0 (1ul << (63-15)) @@ -299,6 +301,7 @@ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ #define LPCR_MER 0x00000800 /* Mediated External Exception */ #define LPCR_MER_SH 11 +#define LPCR_TC 0x00000200 /* Translation control */ #define LPCR_LPES 0x0000000c #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ @@ -311,6 +314,10 @@ #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_PCR 0x152 /* Processor compatibility register */ +#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ +#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ +#define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ #define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ @@ -420,6 +427,7 @@ #define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ #define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ #define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ +#define HID4_RMOR (0xFFFFul << HID4_RMOR_SH) #define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ #define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ #define HID4_LPID1_SH 0 /* partition ID top 2 bits */ @@ -1102,6 +1110,13 @@ #define PVR_BE 0x0070 #define PVR_PA6T 0x0090 +/* "Logical" PVR values defined in PAPR, representing architecture levels */ +#define PVR_ARCH_204 0x0f000001 +#define PVR_ARCH_205 0x0f000002 +#define PVR_ARCH_206 0x0f000003 +#define PVR_ARCH_206p 0x0f100003 +#define PVR_ARCH_207 0x0f000004 + /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index ed8f836da094..2e31aacd8acc 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -381,7 +381,7 @@ #define DBCR0_IA34T 0x00004000 /* Instr Addr 3-4 range Toggle */ #define DBCR0_FT 0x00000001 /* Freeze Timers on debug event */ -#define dbcr_iac_range(task) ((task)->thread.dbcr0) +#define dbcr_iac_range(task) ((task)->thread.debug.dbcr0) #define DBCR_IAC12I DBCR0_IA12 /* Range Inclusive */ #define DBCR_IAC12X (DBCR0_IA12 | DBCR0_IA12X) /* Range Exclusive */ #define DBCR_IAC12MODE (DBCR0_IA12 | DBCR0_IA12X) /* IAC 1-2 Mode Bits */ @@ -395,7 +395,7 @@ #define DBCR1_DAC1W 0x20000000 /* DAC1 Write Debug Event */ #define DBCR1_DAC2W 0x10000000 /* DAC2 Write Debug Event */ -#define dbcr_dac(task) ((task)->thread.dbcr1) +#define dbcr_dac(task) ((task)->thread.debug.dbcr1) #define DBCR_DAC1R DBCR1_DAC1R #define DBCR_DAC1W DBCR1_DAC1W #define DBCR_DAC2R DBCR1_DAC2R @@ -441,7 +441,7 @@ #define DBCR0_CRET 0x00000020 /* Critical Return Debug Event */ #define DBCR0_FT 0x00000001 /* Freeze Timers on debug event */ -#define dbcr_dac(task) ((task)->thread.dbcr0) +#define dbcr_dac(task) ((task)->thread.debug.dbcr0) #define DBCR_DAC1R DBCR0_DAC1R #define DBCR_DAC1W DBCR0_DAC1W #define DBCR_DAC2R DBCR0_DAC2R @@ -475,7 +475,7 @@ #define DBCR1_IAC34MX 0x000000C0 /* Instr Addr 3-4 range eXclusive */ #define DBCR1_IAC34AT 0x00000001 /* Instr Addr 3-4 range Toggle */ -#define dbcr_iac_range(task) ((task)->thread.dbcr1) +#define dbcr_iac_range(task) ((task)->thread.debug.dbcr1) #define DBCR_IAC12I DBCR1_IAC12M /* Range Inclusive */ #define DBCR_IAC12X DBCR1_IAC12MX /* Range Exclusive */ #define DBCR_IAC12MODE DBCR1_IAC12MX /* IAC 1-2 Mode Bits */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 2be5618cdec6..9ee12610af02 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -35,6 +35,7 @@ extern void giveup_vsx(struct task_struct *); extern void enable_kernel_spe(void); extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); +extern void switch_booke_debug_regs(struct thread_struct *new_thread); #ifndef CONFIG_SMP extern void discard_lazy_cpu_state(void); diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0fb1a6e9ff90..6836ec79a830 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_GUEST_DEBUG struct kvm_regs { __u64 pc; @@ -269,7 +270,24 @@ struct kvm_fpu { __u64 fpr[32]; }; +/* + * Defines for h/w breakpoint, watchpoint (read, write or both) and + * software breakpoint. + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status" + * for KVM_DEBUG_EXIT. + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) struct kvm_debug_exit_arch { + __u64 address; + /* + * exiting to userspace because of h/w breakpoint, watchpoint + * (read, write or both) and software breakpoint. + */ + __u32 status; + __u32 reserved; }; /* for KVM_SET_GUEST_DEBUG */ @@ -281,10 +299,6 @@ struct kvm_guest_debug_arch { * Type denotes h/w breakpoint, read watchpoint, write * watchpoint or watchpoint (both read and write). */ -#define KVMPPC_DEBUG_NONE 0x0 -#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) -#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) -#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) __u32 type; __u32 reserved; } bp[16]; @@ -429,6 +443,11 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) #define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) #define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) +#define KVM_REG_PPC_MMCR2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13) +#define KVM_REG_PPC_MMCRS (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14) +#define KVM_REG_PPC_SIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15) +#define KVM_REG_PPC_SDAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16) +#define KVM_REG_PPC_SIER (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17) #define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) #define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) @@ -499,6 +518,65 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) +/* Timebase offset */ +#define KVM_REG_PPC_TB_OFFSET (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c) + +/* POWER8 registers */ +#define KVM_REG_PPC_SPMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d) +#define KVM_REG_PPC_SPMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e) +#define KVM_REG_PPC_IAMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f) +#define KVM_REG_PPC_TFHAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0) +#define KVM_REG_PPC_TFIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1) +#define KVM_REG_PPC_TEXASR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2) +#define KVM_REG_PPC_FSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3) +#define KVM_REG_PPC_PSPB (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4) +#define KVM_REG_PPC_EBBHR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5) +#define KVM_REG_PPC_EBBRR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6) +#define KVM_REG_PPC_BESCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7) +#define KVM_REG_PPC_TAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8) +#define KVM_REG_PPC_DPDES (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9) +#define KVM_REG_PPC_DAWR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa) +#define KVM_REG_PPC_DAWRX (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab) +#define KVM_REG_PPC_CIABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac) +#define KVM_REG_PPC_IC (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad) +#define KVM_REG_PPC_VTB (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae) +#define KVM_REG_PPC_CSIGR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf) +#define KVM_REG_PPC_TACR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0) +#define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) +#define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) +#define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) + +#define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) +#define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) +#define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) + +/* Architecture compatibility level */ +#define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) + +/* Transactional Memory checkpointed state: + * This is all GPRs, all VSX regs and a subset of SPRs + */ +#define KVM_REG_PPC_TM (KVM_REG_PPC | 0x80000000) +/* TM GPRs */ +#define KVM_REG_PPC_TM_GPR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0) +#define KVM_REG_PPC_TM_GPR(n) (KVM_REG_PPC_TM_GPR0 + (n)) +#define KVM_REG_PPC_TM_GPR31 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f) +/* TM VSX */ +#define KVM_REG_PPC_TM_VSR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20) +#define KVM_REG_PPC_TM_VSR(n) (KVM_REG_PPC_TM_VSR0 + (n)) +#define KVM_REG_PPC_TM_VSR63 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f) +/* TM SPRS */ +#define KVM_REG_PPC_TM_CR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60) +#define KVM_REG_PPC_TM_LR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61) +#define KVM_REG_PPC_TM_CTR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62) +#define KVM_REG_PPC_TM_FPSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63) +#define KVM_REG_PPC_TM_AMR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64) +#define KVM_REG_PPC_TM_PPR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65) +#define KVM_REG_PPC_TM_VRSAVE (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66) +#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) +#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) +#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) + /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31a..479b036d36d7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -80,10 +80,11 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); @@ -113,7 +114,7 @@ int main(void) #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0)); #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); @@ -445,7 +446,7 @@ int main(void) DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); @@ -476,7 +477,7 @@ int main(void) DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); /* book3s */ -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -508,6 +509,8 @@ int main(void) DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); + DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); @@ -517,18 +520,22 @@ int main(void) DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); + DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - - offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); + DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); + DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); #ifdef CONFIG_PPC_BOOK3S_64 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) #else # define SVCPU_FIELD(x, f) @@ -580,7 +587,7 @@ int main(void) HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); @@ -596,10 +603,11 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); DEFINE(IPI_PRIORITY, IPI_PRIORITY); -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 HSTATE_FIELD(HSTATE_CFAR, cfar); + HSTATE_FIELD(HSTATE_PPR, ppr); #endif /* CONFIG_PPC_BOOK3S_64 */ #else /* CONFIG_PPC_BOOK3S */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3a9ed6ac224b..9f905e40922e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -126,7 +126,7 @@ BEGIN_FTR_SECTION bgt cr1,. GET_PACA(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE li r0,KVM_HWTHREAD_IN_KERNEL stb r0,HSTATE_HWTHREAD_STATE(r13) /* Order setting hwthread_state vs. testing hwthread_req */ @@ -425,7 +425,7 @@ data_access_check_stab: mfspr r9,SPRN_DSISR srdi r10,r10,60 rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_PR +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif @@ -650,6 +650,32 @@ slb_miss_user_pseries: b . /* prevent spec. execution */ #endif /* __DISABLED__ */ +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +kvmppc_skip_interrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) + rfid + b . + +kvmppc_skip_Hinterrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . +#endif + /* * Code from here down to __end_handlers is invoked from the * exception prologs above. Because the prologs assemble the diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f4e595..847e40e62fce 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -84,7 +84,7 @@ _GLOBAL(power7_nap) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0adab06ce5c0..572bb5b95f35 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -661,7 +661,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); + page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); if (!page) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); tbl->it_map = page_address(page); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7af..c7cb8c232d2f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,50 +441,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -501,9 +457,9 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -519,18 +475,57 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else { + desc = irq_to_desc(irq); + if (likely(desc)) + desc->handle_irq(irq, desc); + } trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp, *sirqtp; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + sirqtp = softirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp || curtp == sirqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -592,28 +587,22 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563b..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,26 +36,41 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -_GLOBAL(call_handle_irq) +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) + bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff2..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl .__do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e20..ec5ae55ba36a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -314,28 +314,28 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); */ static void set_debug_reg_defaults(struct thread_struct *thread) { - thread->iac1 = thread->iac2 = 0; + thread->debug.iac1 = thread->debug.iac2 = 0; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 - thread->iac3 = thread->iac4 = 0; + thread->debug.iac3 = thread->debug.iac4 = 0; #endif - thread->dac1 = thread->dac2 = 0; + thread->debug.dac1 = thread->debug.dac2 = 0; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - thread->dvc1 = thread->dvc2 = 0; + thread->debug.dvc1 = thread->debug.dvc2 = 0; #endif - thread->dbcr0 = 0; + thread->debug.dbcr0 = 0; #ifdef CONFIG_BOOKE /* * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) */ - thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | DBCR1_IAC4US; /* * Force Data Address Compare User/Supervisor bits to be User-only * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. */ - thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; + thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else - thread->dbcr1 = 0; + thread->debug.dbcr1 = 0; #endif } @@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread) */ mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_IAC1, thread->iac1); - mtspr(SPRN_IAC2, thread->iac2); + mtspr(SPRN_IAC1, thread->debug.iac1); + mtspr(SPRN_IAC2, thread->debug.iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 - mtspr(SPRN_IAC3, thread->iac3); - mtspr(SPRN_IAC4, thread->iac4); + mtspr(SPRN_IAC3, thread->debug.iac3); + mtspr(SPRN_IAC4, thread->debug.iac4); #endif - mtspr(SPRN_DAC1, thread->dac1); - mtspr(SPRN_DAC2, thread->dac2); + mtspr(SPRN_DAC1, thread->debug.dac1); + mtspr(SPRN_DAC2, thread->debug.dac2); #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - mtspr(SPRN_DVC1, thread->dvc1); - mtspr(SPRN_DVC2, thread->dvc2); + mtspr(SPRN_DVC1, thread->debug.dvc1); + mtspr(SPRN_DVC2, thread->debug.dvc2); #endif - mtspr(SPRN_DBCR0, thread->dbcr0); - mtspr(SPRN_DBCR1, thread->dbcr1); + mtspr(SPRN_DBCR0, thread->debug.dbcr0); + mtspr(SPRN_DBCR1, thread->debug.dbcr1); #ifdef CONFIG_BOOKE - mtspr(SPRN_DBCR2, thread->dbcr2); + mtspr(SPRN_DBCR2, thread->debug.dbcr2); #endif } /* @@ -371,12 +371,13 @@ static void prime_debug_regs(struct thread_struct *thread) * debug registers, set the debug registers from the values * stored in the new thread. */ -static void switch_booke_debug_regs(struct thread_struct *new_thread) +void switch_booke_debug_regs(struct thread_struct *new_thread) { - if ((current->thread.dbcr0 & DBCR0_IDM) - || (new_thread->dbcr0 & DBCR0_IDM)) + if ((current->thread.debug.dbcr0 & DBCR0_IDM) + || (new_thread->debug.dbcr0 & DBCR0_IDM)) prime_debug_regs(new_thread); } +EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT static void set_debug_reg_defaults(struct thread_struct *thread) @@ -1000,9 +1001,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60e..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &val, sizeof(val)); + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) /* PowerVN takeover hack */ prom_rtas_data = base; @@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 9a0d24c390a3..ddaf1780879c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -854,8 +854,8 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.dbcr0 &= ~DBCR0_BT; - task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + task->thread.debug.dbcr0 &= ~DBCR0_BT; + task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr &= ~MSR_BE; @@ -871,8 +871,8 @@ void user_enable_block_step(struct task_struct *task) if (regs != NULL) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.dbcr0 &= ~DBCR0_IC; - task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; + task->thread.debug.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; regs->msr |= MSR_DE; #else regs->msr &= ~MSR_SE; @@ -894,16 +894,16 @@ void user_disable_single_step(struct task_struct *task) * And, after doing so, if all debug flags are off, turn * off DBCR0(IDM) and MSR(DE) .... Torez */ - task->thread.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 &= ~DBCR0_IC; /* * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. */ - if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, - task->thread.dbcr1)) { + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { /* * All debug events were off..... */ - task->thread.dbcr0 &= ~DBCR0_IDM; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; regs->msr &= ~MSR_DE; } #else @@ -1022,14 +1022,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, */ /* DAC's hold the whole address without any mode flags */ - task->thread.dac1 = data & ~0x3UL; + task->thread.debug.dac1 = data & ~0x3UL; - if (task->thread.dac1 == 0) { + if (task->thread.debug.dac1 == 0) { dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); - if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, - task->thread.dbcr1)) { + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { task->thread.regs->msr &= ~MSR_DE; - task->thread.dbcr0 &= ~DBCR0_IDM; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; } return 0; } @@ -1041,7 +1041,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ - task->thread.dbcr0 |= DBCR0_IDM; + task->thread.debug.dbcr0 |= DBCR0_IDM; /* Check for write and read flags and set DBCR0 accordingly */ @@ -1071,10 +1071,10 @@ static long set_instruction_bp(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { int slot; - int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); - int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); - int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); - int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); if (dbcr_iac_range(child) & DBCR_IAC12MODE) slot2_in_use = 1; @@ -1093,9 +1093,9 @@ static long set_instruction_bp(struct task_struct *child, /* We need a pair of IAC regsisters */ if ((!slot1_in_use) && (!slot2_in_use)) { slot = 1; - child->thread.iac1 = bp_info->addr; - child->thread.iac2 = bp_info->addr2; - child->thread.dbcr0 |= DBCR0_IAC1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.iac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC1; if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) dbcr_iac_range(child) |= DBCR_IAC12X; @@ -1104,9 +1104,9 @@ static long set_instruction_bp(struct task_struct *child, #if CONFIG_PPC_ADV_DEBUG_IACS > 2 } else if ((!slot3_in_use) && (!slot4_in_use)) { slot = 3; - child->thread.iac3 = bp_info->addr; - child->thread.iac4 = bp_info->addr2; - child->thread.dbcr0 |= DBCR0_IAC3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.iac4 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC3; if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) dbcr_iac_range(child) |= DBCR_IAC34X; @@ -1126,30 +1126,30 @@ static long set_instruction_bp(struct task_struct *child, */ if (slot2_in_use || (slot3_in_use == slot4_in_use)) { slot = 1; - child->thread.iac1 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC1; goto out; } } if (!slot2_in_use) { slot = 2; - child->thread.iac2 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC2; + child->thread.debug.iac2 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC2; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 } else if (!slot3_in_use) { slot = 3; - child->thread.iac3 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC3; } else if (!slot4_in_use) { slot = 4; - child->thread.iac4 = bp_info->addr; - child->thread.dbcr0 |= DBCR0_IAC4; + child->thread.debug.iac4 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC4; #endif } else return -ENOSPC; } out: - child->thread.dbcr0 |= DBCR0_IDM; + child->thread.debug.dbcr0 |= DBCR0_IDM; child->thread.regs->msr |= MSR_DE; return slot; @@ -1159,49 +1159,49 @@ static int del_instruction_bp(struct task_struct *child, int slot) { switch (slot) { case 1: - if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) { /* address range - clear slots 1 & 2 */ - child->thread.iac2 = 0; + child->thread.debug.iac2 = 0; dbcr_iac_range(child) &= ~DBCR_IAC12MODE; } - child->thread.iac1 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC1; + child->thread.debug.iac1 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC1; break; case 2: - if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) /* used in a range */ return -EINVAL; - child->thread.iac2 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC2; + child->thread.debug.iac2 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC2; break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case 3: - if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) { /* address range - clear slots 3 & 4 */ - child->thread.iac4 = 0; + child->thread.debug.iac4 = 0; dbcr_iac_range(child) &= ~DBCR_IAC34MODE; } - child->thread.iac3 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC3; + child->thread.debug.iac3 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC3; break; case 4: - if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) /* Used in a range */ return -EINVAL; - child->thread.iac4 = 0; - child->thread.dbcr0 &= ~DBCR0_IAC4; + child->thread.debug.iac4 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC4; break; #endif default: @@ -1231,18 +1231,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) dbcr_dac(child) |= DBCR_DAC1R; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) dbcr_dac(child) |= DBCR_DAC1W; - child->thread.dac1 = (unsigned long)bp_info->addr; + child->thread.debug.dac1 = (unsigned long)bp_info->addr; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 if (byte_enable) { - child->thread.dvc1 = + child->thread.debug.dvc1 = (unsigned long)bp_info->condition_value; - child->thread.dbcr2 |= + child->thread.debug.dbcr2 |= ((byte_enable << DBCR2_DVC1BE_SHIFT) | (condition_mode << DBCR2_DVC1M_SHIFT)); } #endif #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { /* Both dac1 and dac2 are part of a range */ return -ENOSPC; #endif @@ -1252,19 +1252,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) dbcr_dac(child) |= DBCR_DAC2R; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) dbcr_dac(child) |= DBCR_DAC2W; - child->thread.dac2 = (unsigned long)bp_info->addr; + child->thread.debug.dac2 = (unsigned long)bp_info->addr; #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 if (byte_enable) { - child->thread.dvc2 = + child->thread.debug.dvc2 = (unsigned long)bp_info->condition_value; - child->thread.dbcr2 |= + child->thread.debug.dbcr2 |= ((byte_enable << DBCR2_DVC2BE_SHIFT) | (condition_mode << DBCR2_DVC2M_SHIFT)); } #endif } else return -ENOSPC; - child->thread.dbcr0 |= DBCR0_IDM; + child->thread.debug.dbcr0 |= DBCR0_IDM; child->thread.regs->msr |= MSR_DE; return slot + 4; @@ -1276,32 +1276,32 @@ static int del_dac(struct task_struct *child, int slot) if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) return -ENOENT; - child->thread.dac1 = 0; + child->thread.debug.dac1 = 0; dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.dbcr2 & DBCR2_DAC12MODE) { - child->thread.dac2 = 0; - child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + child->thread.debug.dac2 = 0; + child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; } - child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); + child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); #endif #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.dvc1 = 0; + child->thread.debug.dvc1 = 0; #endif } else if (slot == 2) { if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) return -ENOENT; #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.dbcr2 & DBCR2_DAC12MODE) + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) /* Part of a range */ return -EINVAL; - child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); + child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); #endif #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.dvc2 = 0; + child->thread.debug.dvc2 = 0; #endif - child->thread.dac2 = 0; + child->thread.debug.dac2 = 0; dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); } else return -EINVAL; @@ -1343,22 +1343,22 @@ static int set_dac_range(struct task_struct *child, return -EIO; } - if (child->thread.dbcr0 & + if (child->thread.debug.dbcr0 & (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) return -ENOSPC; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); - child->thread.dac1 = bp_info->addr; - child->thread.dac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.debug.dac1 = bp_info->addr; + child->thread.debug.dac2 = bp_info->addr2; if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - child->thread.dbcr2 |= DBCR2_DAC12M; + child->thread.debug.dbcr2 |= DBCR2_DAC12M; else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - child->thread.dbcr2 |= DBCR2_DAC12MX; + child->thread.debug.dbcr2 |= DBCR2_DAC12MX; else /* PPC_BREAKPOINT_MODE_MASK */ - child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.debug.dbcr2 |= DBCR2_DAC12MM; child->thread.regs->msr |= MSR_DE; return 5; @@ -1489,9 +1489,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) rc = del_dac(child, (int)data - 4); if (!rc) { - if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, - child->thread.dbcr1)) { - child->thread.dbcr0 &= ~DBCR0_IDM; + if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, + child->thread.debug.dbcr1)) { + child->thread.debug.dbcr0 &= ~DBCR0_IDM; child->thread.regs->msr &= ~MSR_DE; } } @@ -1669,7 +1669,7 @@ long arch_ptrace(struct task_struct *child, long request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, datalp); + ret = put_user(child->thread.debug.dac1, datalp); #else dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index f51599e941c7..18c7c65ea46d 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -269,7 +269,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, (u32 __user *)data); + ret = put_user(child->thread.debug.dac1, (u32 __user *)data); #else dabr_fake = ( (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index bebdf1a1a540..3f220d93c72f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1309,7 +1309,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, unsigned char tmp; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long new_dbcr0 = current->thread.dbcr0; + unsigned long new_dbcr0 = current->thread.debug.dbcr0; #endif for (i=0; i<ndbg; i++) { @@ -1324,7 +1324,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, } else { new_dbcr0 &= ~DBCR0_IC; if (!DBCR_ACTIVE_EVENTS(new_dbcr0, - current->thread.dbcr1)) { + current->thread.debug.dbcr1)) { new_msr &= ~MSR_DE; new_dbcr0 &= ~DBCR0_IDM; } @@ -1359,7 +1359,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, the user is really doing something wrong. */ regs->msr = new_msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - current->thread.dbcr0 = new_dbcr0; + current->thread.debug.dbcr0 = new_dbcr0; #endif if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 27a90b99ef67..b4e667663d9b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/firmware.h> #include "cacheinfo.h" @@ -179,15 +180,25 @@ SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); SYSFS_PMCSETUP(pir, SPRN_PIR); +/* + Lets only enable read for phyp resources and + enable write when needed with a separate function. + Lets be conservative and default to pseries. +*/ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); -static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + static ssize_t show_dscr_default(struct device *dev, struct device_attribute *attr, char *buf) { @@ -394,8 +405,11 @@ static void register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + } if (cpu_has_feature(CPU_FTR_SPURR)) device_create_file(s, &dev_attr_spurr); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 7b60b9851469..cd809eaa8b5c 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -79,6 +79,11 @@ _GLOBAL(tm_abort) TABORT(R3) blr + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" /* void tm_reclaim(struct thread_struct *thread, * unsigned long orig_msr, @@ -123,6 +128,7 @@ _GLOBAL(tm_reclaim) mr r15, r14 ori r15, r15, MSR_FP li r16, MSR_RI + ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h #ifdef CONFIG_VSX @@ -187,11 +193,18 @@ dont_backup_fp: std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) + /* Store the PPR in r11 and reset to decent value */ + std r11, GPR11(r1) /* Temporary stash */ + mfspr r11, SPRN_PPR + HMT_MEDIUM + /* Now get some more GPRS free */ std r7, GPR7(r1) /* Temporary stash */ std r12, GPR12(r1) /* '' '' '' */ ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */ + addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ /* Make r7 look like an exception frame so that we @@ -203,15 +216,19 @@ dont_backup_fp: SAVE_GPR(0, r7) /* user r0 */ SAVE_GPR(2, r7) /* user r2 */ SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_4GPRS(8, r7) /* user r8-r11 */ + SAVE_GPR(8, r7) /* user r8 */ + SAVE_GPR(9, r7) /* user r9 */ + SAVE_GPR(10, r7) /* user r10 */ ld r3, PACATMSCRATCH(r13) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ - ld r5, GPR12(r1) /* user r12 */ - GET_SCRATCH0(6) /* user r13 */ + ld r5, GPR11(r1) /* user r11 */ + ld r6, GPR12(r1) /* user r12 */ + GET_SCRATCH0(8) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) - std r5, GPR12(r7) - std r6, GPR13(r7) + std r5, GPR11(r7) + std r6, GPR12(r7) + std r8, GPR13(r7) SAVE_NVGPRS(r7) /* user r14-r31 */ @@ -234,14 +251,12 @@ dont_backup_fp: std r6, _XER(r7) - /* ******************** TAR, PPR, DSCR ********** */ + /* ******************** TAR, DSCR ********** */ mfspr r3, SPRN_TAR - mfspr r4, SPRN_PPR - mfspr r5, SPRN_DSCR + mfspr r4, SPRN_DSCR std r3, THREAD_TM_TAR(r12) - std r4, THREAD_TM_PPR(r12) - std r5, THREAD_TM_DSCR(r12) + std r4, THREAD_TM_DSCR(r12) /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. @@ -258,7 +273,7 @@ dont_backup_fp: std r3, THREAD_TM_TFHAR(r12) std r4, THREAD_TM_TFIAR(r12) - /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + /* AMR is checkpointed too, but is unsupported by Linux. */ /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ @@ -274,6 +289,12 @@ dont_backup_fp: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr @@ -358,25 +379,24 @@ dont_restore_fp: restore_gprs: - /* ******************** TAR, PPR, DSCR ********** */ - ld r4, THREAD_TM_TAR(r3) - ld r5, THREAD_TM_PPR(r3) - ld r6, THREAD_TM_DSCR(r3) + /* ******************** CR,LR,CCR,MSR ********** */ + ld r4, _CTR(r7) + ld r5, _LINK(r7) + ld r6, _CCR(r7) + ld r8, _XER(r7) - mtspr SPRN_TAR, r4 - mtspr SPRN_PPR, r5 - mtspr SPRN_DSCR, r6 + mtctr r4 + mtlr r5 + mtcr r6 + mtxer r8 - /* ******************** CR,LR,CCR,MSR ********** */ - ld r3, _CTR(r7) - ld r4, _LINK(r7) - ld r5, _CCR(r7) - ld r6, _XER(r7) + /* ******************** TAR ******************** */ + ld r4, THREAD_TM_TAR(r3) + mtspr SPRN_TAR, r4 - mtctr r3 - mtlr r4 - mtcr r5 - mtxer r6 + /* Load up the PPR and DSCR in GPRs only at this stage */ + ld r5, THREAD_TM_DSCR(r3) + ld r6, THREAD_TM_PPR(r3) /* Clear the MSR RI since we are about to change R1. EE is already off */ @@ -384,19 +404,26 @@ restore_gprs: mtmsrd r4, 1 REST_4GPRS(0, r7) /* GPR0-3 */ - REST_GPR(4, r7) /* GPR4-6 */ - REST_GPR(5, r7) - REST_GPR(6, r7) + REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ REST_NVGPRS(r7) /* GPR14-31 */ - ld r7, GPR7(r7) /* GPR7 */ + /* Load up PPR and DSCR here so we don't run with user values for long + */ + mtspr SPRN_DSCR, r5 + mtspr SPRN_PPR, r6 + + REST_GPR(5, r7) /* GPR5-7 */ + REST_GPR(6, r7) + ld r7, GPR7(r7) /* Commit register state as checkpointed state: */ TRECHKPT + HMT_MEDIUM + /* Our transactional state has now changed. * * Now just get out of here. Transactional (current) state will be @@ -419,6 +446,12 @@ restore_gprs: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f783c932faeb..4f5df4e7df12 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -351,8 +351,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_TRAP ESR_PTR /* single-step stuff */ -#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) -#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) +#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC) #else /* On non-4xx, the reason for the machine check or program @@ -1486,7 +1486,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - current->thread.dbcr2 &= ~DBCR2_DAC12MODE; + current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; #endif do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, 5); @@ -1497,24 +1497,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) 6); changed |= 0x01; } else if (debug_status & DBSR_IAC1) { - current->thread.dbcr0 &= ~DBCR0_IAC1; + current->thread.debug.dbcr0 &= ~DBCR0_IAC1; dbcr_iac_range(current) &= ~DBCR_IAC12MODE; do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, 1); changed |= 0x01; } else if (debug_status & DBSR_IAC2) { - current->thread.dbcr0 &= ~DBCR0_IAC2; + current->thread.debug.dbcr0 &= ~DBCR0_IAC2; do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, 2); changed |= 0x01; } else if (debug_status & DBSR_IAC3) { - current->thread.dbcr0 &= ~DBCR0_IAC3; + current->thread.debug.dbcr0 &= ~DBCR0_IAC3; dbcr_iac_range(current) &= ~DBCR_IAC34MODE; do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, 3); changed |= 0x01; } else if (debug_status & DBSR_IAC4) { - current->thread.dbcr0 &= ~DBCR0_IAC4; + current->thread.debug.dbcr0 &= ~DBCR0_IAC4; do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, 4); changed |= 0x01; @@ -1524,19 +1524,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) * Check all other debug flags and see if that bit needs to be turned * back on or not. */ - if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, + current->thread.debug.dbcr1)) regs->msr |= MSR_DE; else /* Make sure the IDM flag is off */ - current->thread.dbcr0 &= ~DBCR0_IDM; + current->thread.debug.dbcr0 &= ~DBCR0_IDM; if (changed & 0x01) - mtspr(SPRN_DBCR0, current->thread.dbcr0); + mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { - current->thread.dbsr = debug_status; + current->thread.debug.dbsr = debug_status; /* Hack alert: On BookE, Branch Taken stops on the branch itself, while * on server, it stops on the target of the branch. In order to simulate @@ -1553,8 +1554,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) /* Do the single step trick only when coming from userspace */ if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_BT; - current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; + current->thread.debug.dbcr0 &= ~DBCR0_BT; + current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; return; } @@ -1582,13 +1583,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) return; if (user_mode(regs)) { - current->thread.dbcr0 &= ~DBCR0_IC; - if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, - current->thread.dbcr1)) + current->thread.debug.dbcr0 &= ~DBCR0_IC; + if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, + current->thread.debug.dbcr1)) regs->msr |= MSR_DE; else /* Make sure the IDM bit is off */ - current->thread.dbcr0 &= ~DBCR0_IDM; + current->thread.debug.dbcr0 &= ~DBCR0_IDM; } _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 78a350670de3..d38cc08b16c7 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1530,11 +1530,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; + if (!dn) { + strcat(buf, "\n"); + return strlen(buf); + } cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (!cp) { + strcat(buf, "\n"); + return strlen(buf); + } return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); } diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 2f5c6b6d6877..93221e87b911 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -31,13 +31,13 @@ #include "44x_tlb.h" #include "booke.h" -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); kvmppc_44x_tlb_load(vcpu); } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) { kvmppc_44x_tlb_put(vcpu); kvmppc_booke_vcpu_put(vcpu); @@ -114,29 +114,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { - kvmppc_get_sregs_ivor(vcpu, sregs); + return kvmppc_get_sregs_ivor(vcpu, sregs); } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { return kvmppc_set_sregs_ivor(vcpu, sregs); } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { return -EINVAL; } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { return -EINVAL; } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, + unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; struct kvm_vcpu *vcpu; @@ -167,7 +170,7 @@ out: return ERR_PTR(err); } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); @@ -176,28 +179,53 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_44x(struct kvm *kvm) { return 0; } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) { } +static struct kvmppc_ops kvm_ops_44x = { + .get_sregs = kvmppc_core_get_sregs_44x, + .set_sregs = kvmppc_core_set_sregs_44x, + .get_one_reg = kvmppc_get_one_reg_44x, + .set_one_reg = kvmppc_set_one_reg_44x, + .vcpu_load = kvmppc_core_vcpu_load_44x, + .vcpu_put = kvmppc_core_vcpu_put_44x, + .vcpu_create = kvmppc_core_vcpu_create_44x, + .vcpu_free = kvmppc_core_vcpu_free_44x, + .mmu_destroy = kvmppc_mmu_destroy_44x, + .init_vm = kvmppc_core_init_vm_44x, + .destroy_vm = kvmppc_core_destroy_vm_44x, + .emulate_op = kvmppc_core_emulate_op_44x, + .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, + .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, +}; + static int __init kvmppc_44x_init(void) { int r; r = kvmppc_booke_init(); if (r) - return r; + goto err_out; + + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); + if (r) + goto err_out; + kvm_ops_44x.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_44x; - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); +err_out: + return r; } static void __exit kvmppc_44x_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 35ec0a8547da..92c9ab4bcfec 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) return EMULATE_DONE; } -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) +int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) { int emulated = EMULATE_DONE; int dcrn = get_dcrn(inst); @@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; @@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ed0385448148..0deef1082e02 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, trace_kvm_stlb_inval(stlb_index); } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ffaef2cb101a..8aeeda1ff42a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -34,17 +34,20 @@ config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER -config KVM_BOOK3S_PR +config KVM_BOOK3S_PR_POSSIBLE bool select KVM_MMIO select MMU_NOTIFIER +config KVM_BOOK3S_HV_POSSIBLE + bool + config KVM_BOOK3S_32 tristate "KVM support for PowerPC book3s_32 processors" depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT select KVM select KVM_BOOK3S_32_HANDLER - select KVM_BOOK3S_PR + select KVM_BOOK3S_PR_POSSIBLE ---help--- Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. @@ -59,6 +62,7 @@ config KVM_BOOK3S_64 depends on PPC_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM + select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. @@ -69,8 +73,9 @@ config KVM_BOOK3S_64 If unsure, say N. config KVM_BOOK3S_64_HV - bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" + tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 + select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA ---help--- @@ -89,9 +94,20 @@ config KVM_BOOK3S_64_HV If unsure, say N. config KVM_BOOK3S_64_PR - def_bool y - depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV - select KVM_BOOK3S_PR + tristate "KVM support without using hypervisor mode in host" + depends on KVM_BOOK3S_64 + select KVM_BOOK3S_PR_POSSIBLE + ---help--- + Support running guest kernels in virtual machines on processors + without using hypervisor mode in the host, by running the + guest in user mode (problem state) and emulating all + privileged instructions and registers. + + This is not as fast as using hypervisor mode, but works on + machines where hypervisor mode is not available or not usable, + and can emulate processors that are different from the host + processor, including emulating 32-bit processors on a 64-bit + host. config KVM_BOOKE_HV bool diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 6646c952c5e3..ce569b6bf4d8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -53,41 +53,51 @@ kvm-e500mc-objs := \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - $(KVM)/coalesced_mmio.o \ +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ + book3s_64_vio_hv.o + +kvm-pr-y := \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ - book3s_64_vio_hv.o \ book3s_emulate.o \ book3s_interrupts.o \ book3s_mmu_hpte.o \ book3s_64_mmu_host.o \ book3s_64_mmu.o \ book3s_32_mmu.o -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ + +ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE +kvm-book3s_64-module-objs := \ + $(KVM)/coalesced_mmio.o + +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o +endif -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ +kvm-hv-y += \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o + kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + +ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ - book3s_64_vio_hv.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) +endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o -kvm-book3s_64-module-objs := \ +kvm-book3s_64-module-objs += \ $(KVM)/kvm_main.o \ $(KVM)/eventfd.o \ powerpc.o \ @@ -123,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o +obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o +obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o + obj-y += $(kvm-book3s_64-builtin-objs-y) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 700df6f1d32c..8912608b7e1b 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -34,6 +34,7 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> +#include "book3s.h" #include "trace.h" #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -69,6 +70,50 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) +{ + if (!is_kvmppc_hv_enabled(vcpu->kvm)) + return to_book3s(vcpu)->hior; + return 0; +} + +static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, + unsigned long pending_now, unsigned long old_pending) +{ + if (is_kvmppc_hv_enabled(vcpu->kvm)) + return; + if (pending_now) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; +} + +static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) +{ + ulong crit_raw; + ulong crit_r1; + bool crit; + + if (is_kvmppc_hv_enabled(vcpu->kvm)) + return false; + + crit_raw = vcpu->arch.shared->critical; + crit_r1 = kvmppc_get_gpr(vcpu, 1); + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + + return crit; +} + void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); @@ -126,28 +171,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) printk(KERN_INFO "Queueing interrupt %x\n", vec); #endif } - +EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec); int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) { return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); } +EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec); void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); } +EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) @@ -285,8 +334,10 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return 0; } +EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) +pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, + bool *writable) { ulong mp_pa = vcpu->arch.magic_page_pa; @@ -302,20 +353,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; get_page(pfn_to_page(pfn)); + if (writable) + *writable = true; return pfn; } - return gfn_to_pfn(vcpu->kvm, gfn); + return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } +EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, - struct kvmppc_pte *pte) + bool iswrite, struct kvmppc_pte *pte) { int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); int r; if (relocated) { - r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); + r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite); } else { pte->eaddr = eaddr; pte->raddr = eaddr & KVM_PAM; @@ -361,7 +415,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.st++; - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) return -ENOENT; *eaddr = pte.raddr; @@ -374,6 +428,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } +EXPORT_SYMBOL_GPL(kvmppc_st); int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) @@ -383,7 +438,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) goto nopte; *eaddr = pte.raddr; @@ -404,6 +459,7 @@ nopte: mmio: return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_ld); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -419,6 +475,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; @@ -495,8 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (size > sizeof(val)) return -EINVAL; - r = kvmppc_get_one_reg(vcpu, reg->id, &val); - + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -528,6 +595,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) } val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); break; + case KVM_REG_PPC_VRSAVE: + val = get_reg_val(reg->id, vcpu->arch.vrsave); + break; #endif /* CONFIG_ALTIVEC */ case KVM_REG_PPC_DEBUG_INST: { u32 opcode = INS_TW; @@ -572,8 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) return -EFAULT; - r = kvmppc_set_one_reg(vcpu, reg->id, &val); - + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -605,6 +674,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) } vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_VRSAVE: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); + break; #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: @@ -625,6 +701,27 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) return r; } +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ + vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); +} +EXPORT_SYMBOL_GPL(kvmppc_set_msr); + +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { @@ -644,3 +741,141 @@ void kvmppc_decrementer_func(unsigned long data) kvmppc_core_queue_dec(vcpu); kvm_vcpu_kick(vcpu); } + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + return kvm->arch.kvm_ops->vcpu_create(kvm, id); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); +} + +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return kvm->arch.kvm_ops->get_dirty_log(kvm, log); +} + +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + kvm->arch.kvm_ops->free_memslot(free, dont); +} + +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return kvm->arch.kvm_ops->create_memslot(slot, npages); +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + kvm->arch.kvm_ops->flush_memslot(kvm, memslot); +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) +{ + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old) +{ + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm->arch.kvm_ops->unmap_hva(kvm, hva); +} +EXPORT_SYMBOL_GPL(kvm_unmap_hva); + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm->arch.kvm_ops->age_hva(kvm, hva); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm->arch.kvm_ops->test_age_hva(kvm, hva); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + +#ifdef CONFIG_PPC64 + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); +#endif + + return kvm->arch.kvm_ops->init_vm(kvm); +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvm->arch.kvm_ops->destroy_vm(kvm); + +#ifdef CONFIG_PPC64 + kvmppc_rtas_tokens_free(kvm); + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); +#endif +} + +int kvmppc_core_check_processor_compat(void) +{ + /* + * We always return 0 for book3s. We check + * for compatability while loading the HV + * or PR module + */ + return 0; +} + +static int kvmppc_book3s_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (r) + return r; +#ifdef CONFIG_KVM_BOOK3S_32 + r = kvmppc_book3s_init_pr(); +#endif + return r; + +} + +static void kvmppc_book3s_exit(void) +{ +#ifdef CONFIG_KVM_BOOK3S_32 + kvmppc_book3s_exit_pr(); +#endif + kvm_exit(); +} + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h new file mode 100644 index 000000000000..4bf956cf94d6 --- /dev/null +++ b/arch/powerpc/kvm/book3s.h @@ -0,0 +1,34 @@ +/* + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + * + */ + +#ifndef __POWERPC_KVM_BOOK3S_H__ +#define __POWERPC_KVM_BOOK3S_H__ + +extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, + struct kvm_memory_slot *memslot); +extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, + unsigned long end); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); + +extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, + int sprn, ulong spr_val); +extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, + int sprn, ulong *spr_val); +extern int kvmppc_book3s_init_pr(void); +extern void kvmppc_book3s_exit_pr(void); + +#endif diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index c8cefdd15fd8..76a64ce6a5b6 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -84,7 +84,8 @@ static inline bool sr_nx(u32 sr_raw) } static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, - struct kvmppc_pte *pte, bool data); + struct kvmppc_pte *pte, bool data, + bool iswrite); static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); @@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, u64 vsid; struct kvmppc_pte pte; - if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) + if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false)) return pte.vpage; kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, 0); } -static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, +static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, u32 sre, gva_t eaddr, bool primary) { + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u32 page, hash, pteg, htabmask; hva_t r; @@ -132,7 +134,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, sr_vsid(sre)); - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); if (kvm_is_error_hva(r)) return r; return r | (pteg & ~PAGE_MASK); @@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary) } static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, - struct kvmppc_pte *pte, bool data) + struct kvmppc_pte *pte, bool data, + bool iswrite) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_bat *bat; @@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, printk(KERN_INFO "BAT is not readable!\n"); continue; } - if (!pte->may_write) { - /* let's treat r/o BATs as not-readable for now */ + if (iswrite && !pte->may_write) { dprintk_pte("BAT is read-only!\n"); continue; } @@ -201,9 +203,8 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data, - bool primary) + bool iswrite, bool primary) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u32 sre; hva_t ptegp; u32 pteg[16]; @@ -218,7 +219,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); - ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); + ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary); if (kvm_is_error_hva(ptegp)) { printk(KERN_INFO "KVM: Invalid PTEG!\n"); goto no_page_found; @@ -258,9 +259,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, break; } - if ( !pte->may_read ) - continue; - dprintk_pte("MMU: Found PTE -> %x %x - %x\n", pteg[i], pteg[i+1], pp); found = 1; @@ -271,19 +269,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, /* Update PTE C and A bits, so the guest's swapper knows we used the page */ if (found) { - u32 oldpte = pteg[i+1]; - - if (pte->may_read) - pteg[i+1] |= PTEG_FLAG_ACCESSED; - if (pte->may_write) - pteg[i+1] |= PTEG_FLAG_DIRTY; - else - dprintk_pte("KVM: Mapping read-only page!\n"); - - /* Write back into the PTEG */ - if (pteg[i+1] != oldpte) - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); - + u32 pte_r = pteg[i+1]; + char __user *addr = (char __user *) &pteg[i+1]; + + /* + * Use single-byte writes to update the HPTE, to + * conform to what real hardware does. + */ + if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) { + pte_r |= PTEG_FLAG_ACCESSED; + put_user(pte_r >> 8, addr + 2); + } + if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) { + pte_r |= PTEG_FLAG_DIRTY; + put_user(pte_r, addr + 3); + } + if (!pte->may_read || (iswrite && !pte->may_write)) + return -EPERM; return 0; } @@ -302,12 +304,14 @@ no_page_found: } static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, - struct kvmppc_pte *pte, bool data) + struct kvmppc_pte *pte, bool data, + bool iswrite) { int r; ulong mp_ea = vcpu->arch.magic_page_ea; pte->eaddr = eaddr; + pte->page_size = MMU_PAGE_4K; /* Magic page override */ if (unlikely(mp_ea) && @@ -323,11 +327,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return 0; } - r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite); if (r < 0) - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, + data, iswrite, true); if (r < 0) - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, + data, iswrite, false); return r; } @@ -347,7 +353,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) { - kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); + int i; + struct kvm_vcpu *v; + + /* flush this VA on all cpus */ + kvm_for_each_vcpu(i, v, vcpu->kvm) + kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000); } static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 00e619bf608e..3a0abd2e5a15 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, extern char etext[]; -int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, + bool iswrite) { pfn_t hpaddr; u64 vpn; @@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) bool evict = false; struct hpte_cache *pte; int r = 0; + bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, + iswrite, &writable); if (is_error_noslot_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); @@ -204,7 +207,7 @@ next_pteg: (primary ? 0 : PTE_SEC); pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; - if (orig_pte->may_write) { + if (orig_pte->may_write && writable) { pteg1 |= PP_RWRW; mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); } else { @@ -259,6 +262,11 @@ out: return r; } +void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL); +} + static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) { struct kvmppc_sid_map *map; @@ -341,7 +349,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) svcpu_put(svcpu); } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) { int i; diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 7e345e00661a..83da1f868fd5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, return kvmppc_slb_calc_vpn(slb, eaddr); } +static int mmu_pagesize(int mmu_pg) +{ + switch (mmu_pg) { + case MMU_PAGE_64K: + return 16; + case MMU_PAGE_16M: + return 24; + } + return 12; +} + static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) { - return slbe->large ? 24 : 12; + return mmu_pagesize(slbe->base_page_size); } static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) @@ -119,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); } -static hva_t kvmppc_mmu_book3s_64_get_pteg( - struct kvmppc_vcpu_book3s *vcpu_book3s, +static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu, struct kvmppc_slb *slbe, gva_t eaddr, bool second) { + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u64 hash, pteg, htabsize; u32 ssize; hva_t r; @@ -148,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( /* When running a PAPR guest, SDR1 contains a HVA address instead of a GPA */ - if (vcpu_book3s->vcpu.arch.papr_enabled) + if (vcpu->arch.papr_enabled) r = pteg; else - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); if (kvm_is_error_hva(r)) return r; @@ -166,18 +177,38 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr) avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); - if (p < 24) - avpn >>= ((80 - p) - 56) - 8; + if (p < 16) + avpn >>= ((80 - p) - 56) - 8; /* 16 - p */ else - avpn <<= 8; + avpn <<= p - 16; return avpn; } +/* + * Return page size encoded in the second word of a HPTE, or + * -1 for an invalid encoding for the base page size indicated by + * the SLB entry. This doesn't handle mixed pagesize segments yet. + */ +static int decode_pagesize(struct kvmppc_slb *slbe, u64 r) +{ + switch (slbe->base_page_size) { + case MMU_PAGE_64K: + if ((r & 0xf000) == 0x1000) + return MMU_PAGE_64K; + break; + case MMU_PAGE_16M: + if ((r & 0xff000) == 0) + return MMU_PAGE_16M; + break; + } + return -1; +} + static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, - struct kvmppc_pte *gpte, bool data) + struct kvmppc_pte *gpte, bool data, + bool iswrite) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_slb *slbe; hva_t ptegp; u64 pteg[16]; @@ -189,6 +220,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, u8 pp, key = 0; bool found = false; bool second = false; + int pgsize; ulong mp_ea = vcpu->arch.magic_page_ea; /* Magic page override */ @@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_execute = true; gpte->may_read = true; gpte->may_write = true; + gpte->page_size = MMU_PAGE_4K; return 0; } @@ -222,8 +255,12 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | HPTE_V_SECONDARY; + pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; + + mutex_lock(&vcpu->kvm->arch.hpt_mutex); + do_second: - ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); + ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second); if (kvm_is_error_hva(ptegp)) goto no_page_found; @@ -240,6 +277,13 @@ do_second: for (i=0; i<16; i+=2) { /* Check all relevant fields of 1st dword */ if ((pteg[i] & v_mask) == v_val) { + /* If large page bit is set, check pgsize encoding */ + if (slbe->large && + (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { + pgsize = decode_pagesize(slbe, pteg[i+1]); + if (pgsize < 0) + continue; + } found = true; break; } @@ -256,13 +300,15 @@ do_second: v = pteg[i]; r = pteg[i+1]; pp = (r & HPTE_R_PP) | key; - eaddr_mask = 0xFFF; + if (r & HPTE_R_PP0) + pp |= 8; gpte->eaddr = eaddr; gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); - if (slbe->large) - eaddr_mask = 0xFFFFFF; + + eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1; gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); + gpte->page_size = pgsize; gpte->may_execute = ((r & HPTE_R_N) ? false : true); gpte->may_read = false; gpte->may_write = false; @@ -277,6 +323,7 @@ do_second: case 3: case 5: case 7: + case 10: gpte->may_read = true; break; } @@ -287,30 +334,37 @@ do_second: /* Update PTE R and C bits, so the guest's swapper knows we used the * page */ - if (gpte->may_read) { - /* Set the accessed flag */ + if (gpte->may_read && !(r & HPTE_R_R)) { + /* + * Set the accessed flag. + * We have to write this back with a single byte write + * because another vcpu may be accessing this on + * non-PAPR platforms such as mac99, and this is + * what real hardware does. + */ + char __user *addr = (char __user *) &pteg[i+1]; r |= HPTE_R_R; + put_user(r >> 8, addr + 6); } - if (data && gpte->may_write) { - /* Set the dirty flag -- XXX even if not writing */ + if (iswrite && gpte->may_write && !(r & HPTE_R_C)) { + /* Set the dirty flag */ + /* Use a single byte write */ + char __user *addr = (char __user *) &pteg[i+1]; r |= HPTE_R_C; + put_user(r, addr + 7); } - /* Write back into the PTEG */ - if (pteg[i+1] != r) { - pteg[i+1] = r; - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); - } + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); - if (!gpte->may_read) + if (!gpte->may_read || (iswrite && !gpte->may_write)) return -EPERM; return 0; no_page_found: + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); return -ENOENT; no_seg_found: - dprintk("KVM MMU: Trigger segment fault\n"); return -EINVAL; } @@ -345,6 +399,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; slbe->class = (rs & SLB_VSID_C) ? 1 : 0; + slbe->base_page_size = MMU_PAGE_4K; + if (slbe->large) { + if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) { + switch (rs & SLB_VSID_LP) { + case SLB_VSID_LP_00: + slbe->base_page_size = MMU_PAGE_16M; + break; + case SLB_VSID_LP_01: + slbe->base_page_size = MMU_PAGE_64K; + break; + } + } else + slbe->base_page_size = MMU_PAGE_16M; + } + slbe->orige = rb & (ESID_MASK | SLB_ESID_V); slbe->origv = rs; @@ -460,14 +529,45 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, bool large) { u64 mask = 0xFFFFFFFFFULL; + long i; + struct kvm_vcpu *v; dprintk("KVM MMU: tlbie(0x%lx)\n", va); - if (large) - mask = 0xFFFFFF000ULL; - kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); + /* + * The tlbie instruction changed behaviour starting with + * POWER6. POWER6 and later don't have the large page flag + * in the instruction but in the RB value, along with bits + * indicating page and segment sizes. + */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) { + /* POWER6 or later */ + if (va & 1) { /* L bit */ + if ((va & 0xf000) == 0x1000) + mask = 0xFFFFFFFF0ULL; /* 64k page */ + else + mask = 0xFFFFFF000ULL; /* 16M page */ + } + } else { + /* older processors, e.g. PPC970 */ + if (large) + mask = 0xFFFFFF000ULL; + } + /* flush this VA on all vcpus */ + kvm_for_each_vcpu(i, v, vcpu->kvm) + kvmppc_mmu_pte_vflush(v, va >> 12, mask); } +#ifdef CONFIG_PPC_64K_PAGES +static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid) +{ + ulong mp_ea = vcpu->arch.magic_page_ea; + + return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) && + (mp_ea >> SID_SHIFT) == esid; +} +#endif + static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid) { @@ -475,11 +575,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, struct kvmppc_slb *slb; u64 gvsid = esid; ulong mp_ea = vcpu->arch.magic_page_ea; + int pagesize = MMU_PAGE_64K; if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); if (slb) { gvsid = slb->vsid; + pagesize = slb->base_page_size; if (slb->tb) { gvsid <<= SID_SHIFT_1T - SID_SHIFT; gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); @@ -490,28 +592,41 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { case 0: - *vsid = VSID_REAL | esid; + gvsid = VSID_REAL | esid; break; case MSR_IR: - *vsid = VSID_REAL_IR | gvsid; + gvsid |= VSID_REAL_IR; break; case MSR_DR: - *vsid = VSID_REAL_DR | gvsid; + gvsid |= VSID_REAL_DR; break; case MSR_DR|MSR_IR: if (!slb) goto no_slb; - *vsid = gvsid; break; default: BUG(); break; } +#ifdef CONFIG_PPC_64K_PAGES + /* + * Mark this as a 64k segment if the host is using + * 64k pages, the host MMU supports 64k pages and + * the guest segment page size is >= 64k, + * but not if this segment contains the magic page. + */ + if (pagesize >= MMU_PAGE_64K && + mmu_psize_defs[MMU_PAGE_64K].shift && + !segment_contains_magic_page(vcpu, esid)) + gvsid |= VSID_64K; +#endif + if (vcpu->arch.shared->msr & MSR_PR) - *vsid |= VSID_PR; + gvsid |= VSID_PR; + *vsid = gvsid; return 0; no_slb: diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e5240524bf6c..0d513af62bba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -27,14 +27,14 @@ #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> -#include "trace.h" +#include "trace_pr.h" #define PTE_SIZE 12 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, + pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M, false); } @@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) return NULL; } -int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) +int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, + bool iswrite) { unsigned long vpn; pfn_t hpaddr; @@ -90,16 +91,26 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) int attempt = 0; struct kvmppc_sid_map *map; int r = 0; + int hpsize = MMU_PAGE_4K; + bool writable; + unsigned long mmu_seq; + struct kvm *kvm = vcpu->kvm; + struct hpte_cache *cpte; + unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT; + unsigned long pfn; + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); - if (is_error_noslot_pfn(hpaddr)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); + pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); + if (is_error_noslot_pfn(pfn)) { + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); r = -EINVAL; goto out; } - hpaddr <<= PAGE_SHIFT; - hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); + hpaddr = pfn << PAGE_SHIFT; /* and write the mapping ea -> hpa into the pt */ vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); @@ -117,20 +128,39 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) goto out; } - vsid = map->host_vsid; - vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); + vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M); - if (!orig_pte->may_write) - rflags |= HPTE_R_PP; - else - mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); + kvm_set_pfn_accessed(pfn); + if (!orig_pte->may_write || !writable) + rflags |= PP_RXRX; + else { + mark_page_dirty(vcpu->kvm, gfn); + kvm_set_pfn_dirty(pfn); + } if (!orig_pte->may_execute) rflags |= HPTE_R_N; else - kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); + kvmppc_mmu_flush_icache(pfn); + + /* + * Use 64K pages if possible; otherwise, on 64K page kernels, + * we need to transfer 4 more bits from guest real to host real addr. + */ + if (vsid & VSID_64K) + hpsize = MMU_PAGE_64K; + else + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); + + hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M); - hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); + cpte = kvmppc_mmu_hpte_cache_next(vcpu); + + spin_lock(&kvm->mmu_lock); + if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { + r = -EAGAIN; + goto out_unlock; + } map_again: hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); @@ -139,11 +169,11 @@ map_again: if (attempt > 1) if (ppc_md.hpte_remove(hpteg) < 0) { r = -1; - goto out; + goto out_unlock; } ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M); + hpsize, hpsize, MMU_SEGSIZE_256M); if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ @@ -152,8 +182,6 @@ map_again: attempt++; goto map_again; } else { - struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); - trace_kvm_book3s_64_mmu_map(rflags, hpteg, vpn, hpaddr, orig_pte); @@ -164,19 +192,37 @@ map_again: hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); } - pte->slot = hpteg + (ret & 7); - pte->host_vpn = vpn; - pte->pte = *orig_pte; - pte->pfn = hpaddr >> PAGE_SHIFT; + cpte->slot = hpteg + (ret & 7); + cpte->host_vpn = vpn; + cpte->pte = *orig_pte; + cpte->pfn = pfn; + cpte->pagesize = hpsize; - kvmppc_mmu_hpte_cache_map(vcpu, pte); + kvmppc_mmu_hpte_cache_map(vcpu, cpte); + cpte = NULL; } - kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + +out_unlock: + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + if (cpte) + kvmppc_mmu_hpte_cache_free(cpte); out: return r; } +void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + u64 mask = 0xfffffffffULL; + u64 vsid; + + vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid); + if (vsid & VSID_64K) + mask = 0xffffffff0ULL; + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask); +} + static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) { struct kvmppc_sid_map *map; @@ -291,6 +337,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) slb_vsid &= ~SLB_VSID_KP; slb_esid |= slb_index; +#ifdef CONFIG_PPC_64K_PAGES + /* Set host segment base page size to 64K if possible */ + if (gvsid & VSID_64K) + slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp; +#endif + svcpu->slb[slb_index].esid = slb_esid; svcpu->slb[slb_index].vsid = slb_vsid; @@ -326,7 +378,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) svcpu_put(svcpu); } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) { kvmppc_mmu_hpte_destroy(vcpu); __destroy_context(to_book3s(vcpu)->context_id[0]); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 043eec8461e7..f3ff587a8b7d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -260,10 +260,6 @@ int kvmppc_mmu_hv_init(void) return 0; } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) -{ -} - static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) { kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); @@ -451,7 +447,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, } static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, - struct kvmppc_pte *gpte, bool data) + struct kvmppc_pte *gpte, bool data, bool iswrite) { struct kvm *kvm = vcpu->kvm; struct kvmppc_slb *slbe; @@ -906,21 +902,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, return 0; } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) { if (kvm->arch.using_mmu_notifiers) kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (kvm->arch.using_mmu_notifiers) kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); return 0; } -void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +void kvmppc_core_flush_memslot_hv(struct kvm *kvm, + struct kvm_memory_slot *memslot) { unsigned long *rmapp; unsigned long gfn; @@ -994,7 +991,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) { if (!kvm->arch.using_mmu_notifiers) return 0; @@ -1032,14 +1029,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) { if (!kvm->arch.using_mmu_notifiers) return 0; return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) { if (!kvm->arch.using_mmu_notifiers) return; @@ -1512,9 +1509,8 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; - lpcr |= senc << (LPCR_VRMASD_SH - 4); - kvm->arch.lpcr = lpcr; + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); rma_setup = 1; } ++i; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 30c2f3b134c6..2c25f5412bdb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,3 +74,4 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, /* Didn't find the liobn, punt it to userspace */ return H_TOO_HARD; } +EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 360ce68c9809..99d40f8977e8 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -86,8 +86,8 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) return true; } -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) +int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) { int emulated = EMULATE_DONE; int rt = get_rt(inst); @@ -172,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64 case OP_31_XOP_FAKE_SC1: { /* SC 1 papr hypercalls */ @@ -267,12 +267,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, r = kvmppc_st(vcpu, &addr, 32, zeros, true); if ((r == -ENOENT) || (r == -EPERM)) { - struct kvmppc_book3s_shadow_vcpu *svcpu; - - svcpu = svcpu_get(vcpu); *advance = 0; vcpu->arch.shared->dar = vaddr; - svcpu->fault_dar = vaddr; + vcpu->arch.fault_dar = vaddr; dsisr = DSISR_ISSTORE; if (r == -ENOENT) @@ -281,8 +278,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, dsisr |= DSISR_PROTFAULT; vcpu->arch.shared->dsisr = dsisr; - svcpu->fault_dsisr = dsisr; - svcpu_put(svcpu); + vcpu->arch.fault_dsisr = dsisr; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); @@ -349,7 +345,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) return bat; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; @@ -472,7 +468,7 @@ unprivileged: return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 7057a02f0906..852989a9bad3 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -20,9 +20,10 @@ #include <linux/export.h> #include <asm/kvm_book3s.h> -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); -#else +#endif +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 62a2b5ab08ed..072287f1c3bc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -52,6 +52,9 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/hugetlb.h> +#include <linux/module.h> + +#include "book3s.h" /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ @@ -66,7 +69,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int me; int cpu = vcpu->cpu; @@ -125,7 +128,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) * purely defensive; they should never fail.) */ -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -143,7 +146,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) spin_unlock(&vcpu->arch.tbacct_lock); } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -155,17 +158,46 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.tbacct_lock); } -void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { vcpu->arch.shregs.msr = msr; kvmppc_end_cede(vcpu); } -void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.pvr = pvr; } +int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) +{ + unsigned long pcr = 0; + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + if (arch_compat) { + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return -EINVAL; /* 970 has no compat mode support */ + + switch (arch_compat) { + case PVR_ARCH_205: + pcr = PCR_ARCH_205; + break; + case PVR_ARCH_206: + case PVR_ARCH_206p: + break; + default: + return -EINVAL; + } + } + + spin_lock(&vc->lock); + vc->arch_compat = arch_compat; + vc->pcr = pcr; + spin_unlock(&vc->lock); + + return 0; +} + void kvmppc_dump_regs(struct kvm_vcpu *vcpu) { int r; @@ -195,7 +227,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err(" ESID = %.16llx VSID = %.16llx\n", vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", - vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, + vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1, vcpu->arch.last_inst); } @@ -489,7 +521,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = now; + dt->timebase = now + vc->tb_offset; dt->enqueue_to_dispatch_time = stolen; dt->srr0 = kvmppc_get_pc(vcpu); dt->srr1 = vcpu->arch.shregs.msr; @@ -538,6 +570,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) } break; case H_CONFER: + target = kvmppc_get_gpr(vcpu, 4); + if (target == -1) + break; + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + if (!tvcpu) { + ret = H_PARAMETER; + break; + } + kvm_vcpu_yield_to(tvcpu); break; case H_REGISTER_VPA: ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), @@ -576,8 +617,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } -static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - struct task_struct *tsk) +static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, + struct task_struct *tsk) { int r = RESUME_HOST; @@ -671,16 +712,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), vcpu->arch.shregs.msr); + run->hw.hardware_exit_reason = vcpu->arch.trap; r = RESUME_HOST; - BUG(); break; } return r; } -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { int i; @@ -694,12 +735,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { int i, j; - kvmppc_set_pvr(vcpu, sregs->pvr); + kvmppc_set_pvr_hv(vcpu, sregs->pvr); j = 0; for (i = 0; i < vcpu->arch.slb_nr; i++) { @@ -714,7 +755,23 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + spin_lock(&vc->lock); + /* + * Userspace can only modify DPFD (default prefetch depth), + * ILE (interrupt little-endian) and TC (translation control). + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; + vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); + spin_unlock(&vc->lock); +} + +static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; long int i; @@ -749,6 +806,12 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; + case KVM_REG_PPC_SIAR: + *val = get_reg_val(id, vcpu->arch.siar); + break; + case KVM_REG_PPC_SDAR: + *val = get_reg_val(id, vcpu->arch.sdar); + break; #ifdef CONFIG_VSX case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: if (cpu_has_feature(CPU_FTR_VSX)) { @@ -787,6 +850,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) val->vpaval.length = vcpu->arch.dtl.len; spin_unlock(&vcpu->arch.vpa_update_lock); break; + case KVM_REG_PPC_TB_OFFSET: + *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); + break; + case KVM_REG_PPC_LPCR: + *val = get_reg_val(id, vcpu->arch.vcore->lpcr); + break; + case KVM_REG_PPC_PPR: + *val = get_reg_val(id, vcpu->arch.ppr); + break; + case KVM_REG_PPC_ARCH_COMPAT: + *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); + break; default: r = -EINVAL; break; @@ -795,7 +870,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) return r; } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; long int i; @@ -833,6 +909,12 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_SIAR: + vcpu->arch.siar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SDAR: + vcpu->arch.sdar = set_reg_val(id, *val); + break; #ifdef CONFIG_VSX case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: if (cpu_has_feature(CPU_FTR_VSX)) { @@ -880,6 +962,20 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) len -= len % sizeof(struct dtl_entry); r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; + case KVM_REG_PPC_TB_OFFSET: + /* round up to multiple of 2^24 */ + vcpu->arch.vcore->tb_offset = + ALIGN(set_reg_val(id, *val), 1UL << 24); + break; + case KVM_REG_PPC_LPCR: + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); + break; + case KVM_REG_PPC_PPR: + vcpu->arch.ppr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_ARCH_COMPAT: + r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); + break; default: r = -EINVAL; break; @@ -888,14 +984,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) return r; } -int kvmppc_core_check_processor_compat(void) -{ - if (cpu_has_feature(CPU_FTR_HVMODE)) - return 0; - return -EIO; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, + unsigned int id) { struct kvm_vcpu *vcpu; int err = -EINVAL; @@ -919,8 +1009,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.mmcr[0] = MMCR0_FC; vcpu->arch.ctrl = CTRL_RUNLATCH; /* default to host PVR, since we can't spoof it */ - vcpu->arch.pvr = mfspr(SPRN_PVR); - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; @@ -940,6 +1029,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) spin_lock_init(&vcore->lock); init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; + vcore->lpcr = kvm->arch.lpcr; } kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; @@ -972,7 +1062,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) vpa->dirty); } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->arch.vpa_update_lock); unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); @@ -983,6 +1073,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu); } +static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) +{ + /* Indicate we want to get back into the guest */ + return 1; +} + static void kvmppc_set_timer(struct kvm_vcpu *vcpu) { unsigned long dec_nsec, now; @@ -1264,8 +1360,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) ret = RESUME_GUEST; if (vcpu->arch.trap) - ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, - vcpu->arch.run_task); + ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); vcpu->arch.ret = ret; vcpu->arch.trap = 0; @@ -1424,7 +1520,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return vcpu->arch.ret; } -int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1546,7 +1642,8 @@ static const struct file_operations kvm_rma_fops = { .release = kvm_rma_release, }; -long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) +static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, + struct kvm_allocate_rma *ret) { long fd; struct kvm_rma_info *ri; @@ -1592,7 +1689,8 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, (*sps)++; } -int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, + struct kvm_ppc_smmu_info *info) { struct kvm_ppc_one_seg_page_size *sps; @@ -1613,7 +1711,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) /* * Get (and clear) the dirty memory log for a memory slot. */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, + struct kvm_dirty_log *log) { struct kvm_memory_slot *memslot; int r; @@ -1667,8 +1766,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot) } } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) { if (!dont || free->arch.rmap != dont->arch.rmap) { vfree(free->arch.rmap); @@ -1681,8 +1780,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free, } } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, - unsigned long npages) +static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, + unsigned long npages) { slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); if (!slot->arch.rmap) @@ -1692,9 +1791,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, return 0; } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) +static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) { unsigned long *phys; @@ -1710,9 +1809,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, return 0; } -void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) +static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memory_slot *memslot; @@ -1729,6 +1828,37 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, } } +/* + * Update LPCR values in kvm->arch and in vcores. + * Caller must hold kvm->lock. + */ +void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) +{ + long int i; + u32 cores_done = 0; + + if ((kvm->arch.lpcr & mask) == lpcr) + return; + + kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr; + + for (i = 0; i < KVM_MAX_VCORES; ++i) { + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; + if (!vc) + continue; + spin_lock(&vc->lock); + vc->lpcr = (vc->lpcr & ~mask) | lpcr; + spin_unlock(&vc->lock); + if (++cores_done >= kvm->arch.online_vcores) + break; + } +} + +static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) +{ + return; +} + static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; @@ -1737,7 +1867,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; - unsigned long lpcr, senc; + unsigned long lpcr = 0, senc; + unsigned long lpcr_mask = 0; unsigned long psize, porder; unsigned long rma_size; unsigned long rmls; @@ -1802,9 +1933,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; - lpcr |= senc << (LPCR_VRMASD_SH - 4); - kvm->arch.lpcr = lpcr; + lpcr_mask = LPCR_VRMASD; + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); /* Create HPTEs in the hash page table for the VRMA */ kvmppc_map_vrma(vcpu, memslot, porder); @@ -1825,23 +1956,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) kvm->arch.rma = ri; /* Update LPCR and RMOR */ - lpcr = kvm->arch.lpcr; if (cpu_has_feature(CPU_FTR_ARCH_201)) { /* PPC970; insert RMLS value (split field) in HID4 */ - lpcr &= ~((1ul << HID4_RMLS0_SH) | - (3ul << HID4_RMLS2_SH)); - lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | + lpcr_mask = (1ul << HID4_RMLS0_SH) | + (3ul << HID4_RMLS2_SH) | HID4_RMOR; + lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | ((rmls & 3) << HID4_RMLS2_SH); /* RMOR is also in HID4 */ lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) << HID4_RMOR_SH; } else { /* POWER7 */ - lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); - lpcr |= rmls << LPCR_RMLS_SH; + lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; + lpcr = rmls << LPCR_RMLS_SH; kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; } - kvm->arch.lpcr = lpcr; pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); @@ -1860,6 +1989,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } } + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ smp_wmb(); kvm->arch.rma_setup_done = 1; @@ -1875,7 +2006,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto out_srcu; } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; @@ -1893,9 +2024,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); - kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -1931,61 +2059,162 @@ int kvmppc_core_init_vm(struct kvm *kvm) return 0; } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_free_vcores(struct kvm *kvm) +{ + long int i; + + for (i = 0; i < KVM_MAX_VCORES; ++i) + kfree(kvm->arch.vcores[i]); + kvm->arch.online_vcores = 0; +} + +static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { uninhibit_secondary_onlining(); + kvmppc_free_vcores(kvm); if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; } - kvmppc_rtas_tokens_free(kvm); - kvmppc_free_hpt(kvm); - WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } -/* These are stubs for now */ -void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) +/* We don't need to emulate any privileged instructions or dcbz */ +static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) { + return EMULATE_FAIL; } -/* We don't need to emulate any privileged instructions or dcbz */ -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) +static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn, + ulong spr_val) { return EMULATE_FAIL; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, + ulong *spr_val) { return EMULATE_FAIL; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +static int kvmppc_core_check_processor_compat_hv(void) { - return EMULATE_FAIL; + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EIO; + return 0; } -static int kvmppc_book3s_hv_init(void) +static long kvm_arch_vm_ioctl_hv(struct file *filp, + unsigned int ioctl, unsigned long arg) { - int r; + struct kvm *kvm __maybe_unused = filp->private_data; + void __user *argp = (void __user *)arg; + long r; - r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + switch (ioctl) { - if (r) + case KVM_ALLOCATE_RMA: { + struct kvm_allocate_rma rma; + struct kvm *kvm = filp->private_data; + + r = kvm_vm_ioctl_allocate_rma(kvm, &rma); + if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) + r = -EFAULT; + break; + } + + case KVM_PPC_ALLOCATE_HTAB: { + u32 htab_order; + + r = -EFAULT; + if (get_user(htab_order, (u32 __user *)argp)) + break; + r = kvmppc_alloc_reset_hpt(kvm, &htab_order); + if (r) + break; + r = -EFAULT; + if (put_user(htab_order, (u32 __user *)argp)) + break; + r = 0; + break; + } + + case KVM_PPC_GET_HTAB_FD: { + struct kvm_get_htab_fd ghf; + + r = -EFAULT; + if (copy_from_user(&ghf, argp, sizeof(ghf))) + break; + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); + break; + } + + default: + r = -ENOTTY; + } + + return r; +} + +static struct kvmppc_ops kvm_ops_hv = { + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, + .get_one_reg = kvmppc_get_one_reg_hv, + .set_one_reg = kvmppc_set_one_reg_hv, + .vcpu_load = kvmppc_core_vcpu_load_hv, + .vcpu_put = kvmppc_core_vcpu_put_hv, + .set_msr = kvmppc_set_msr_hv, + .vcpu_run = kvmppc_vcpu_run_hv, + .vcpu_create = kvmppc_core_vcpu_create_hv, + .vcpu_free = kvmppc_core_vcpu_free_hv, + .check_requests = kvmppc_core_check_requests_hv, + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv, + .flush_memslot = kvmppc_core_flush_memslot_hv, + .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, + .commit_memory_region = kvmppc_core_commit_memory_region_hv, + .unmap_hva = kvm_unmap_hva_hv, + .unmap_hva_range = kvm_unmap_hva_range_hv, + .age_hva = kvm_age_hva_hv, + .test_age_hva = kvm_test_age_hva_hv, + .set_spte_hva = kvm_set_spte_hva_hv, + .mmu_destroy = kvmppc_mmu_destroy_hv, + .free_memslot = kvmppc_core_free_memslot_hv, + .create_memslot = kvmppc_core_create_memslot_hv, + .init_vm = kvmppc_core_init_vm_hv, + .destroy_vm = kvmppc_core_destroy_vm_hv, + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, + .emulate_op = kvmppc_core_emulate_op_hv, + .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, + .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, + .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, + .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, +}; + +static int kvmppc_book3s_init_hv(void) +{ + int r; + /* + * FIXME!! Do we need to check on all cpus ? + */ + r = kvmppc_core_check_processor_compat_hv(); + if (r < 0) return r; - r = kvmppc_mmu_hv_init(); + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + r = kvmppc_mmu_hv_init(); return r; } -static void kvmppc_book3s_hv_exit(void) +static void kvmppc_book3s_exit_hv(void) { - kvm_exit(); + kvmppc_hv_ops = NULL; } -module_init(kvmppc_book3s_hv_init); -module_exit(kvmppc_book3s_hv_exit); +module_init(kvmppc_book3s_init_hv); +module_exit(kvmppc_book3s_exit_hv); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 37f1cc417ca0..928142c64cb0 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -158,9 +158,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * Interrupts are enabled again at this point. */ -.global kvmppc_handler_highmem -kvmppc_handler_highmem: - /* * Register usage at this point: * diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 294b7af28cdd..bc8de75b1925 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -33,30 +33,6 @@ #error Need to fix lppaca and SLB shadow accesses in little endian mode #endif -/***************************************************************************** - * * - * Real Mode handlers that need to be in the linear mapping * - * * - ****************************************************************************/ - - .globl kvmppc_skip_interrupt -kvmppc_skip_interrupt: - mfspr r13,SPRN_SRR0 - addi r13,r13,4 - mtspr SPRN_SRR0,r13 - GET_SCRATCH0(r13) - rfid - b . - - .globl kvmppc_skip_Hinterrupt -kvmppc_skip_Hinterrupt: - mfspr r13,SPRN_HSRR0 - addi r13,r13,4 - mtspr SPRN_HSRR0,r13 - GET_SCRATCH0(r13) - hrfid - b . - /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -66,8 +42,11 @@ kvmppc_skip_Hinterrupt: * LR = return address to continue at after eventually re-enabling MMU */ _GLOBAL(kvmppc_hv_entry_trampoline) + mflr r0 + std r0, PPC_LR_STKOFF(r1) + stdu r1, -112(r1) mfmsr r10 - LOAD_REG_ADDR(r5, kvmppc_hv_entry) + LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) li r0,MSR_RI andc r0,r10,r0 li r6,MSR_IR | MSR_DR @@ -77,11 +56,103 @@ _GLOBAL(kvmppc_hv_entry_trampoline) mtsrr1 r6 RFI -/****************************************************************************** - * * - * Entry code * - * * - *****************************************************************************/ +kvmppc_call_hv_entry: + bl kvmppc_hv_entry + + /* Back from guest - restore host state and return to caller */ + + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + + /* Restore SPRG3 */ + ld r3,PACA_SPRG3(r13) + mtspr SPRN_SPRG3,r3 + + /* + * Reload DEC. HDEC interrupts were disabled when + * we reloaded the host's LPCR value. + */ + ld r3, HSTATE_DECEXP(r13) + mftb r4 + subf r4, r4, r3 + mtspr SPRN_DEC, r4 + + /* Reload the host's PMU registers */ + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ + lbz r4, LPPACA_PMCINUSE(r3) + cmpwi r4, 0 + beq 23f /* skip if not */ + lwz r3, HSTATE_PMC(r13) + lwz r4, HSTATE_PMC + 4(r13) + lwz r5, HSTATE_PMC + 8(r13) + lwz r6, HSTATE_PMC + 12(r13) + lwz r8, HSTATE_PMC + 16(r13) + lwz r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + lwz r10, HSTATE_PMC + 24(r13) + lwz r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r4 + mtspr SPRN_PMC3, r5 + mtspr SPRN_PMC4, r6 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + ld r3, HSTATE_MMCR(r13) + ld r4, HSTATE_MMCR + 8(r13) + ld r5, HSTATE_MMCR + 16(r13) + mtspr SPRN_MMCR1, r4 + mtspr SPRN_MMCRA, r5 + mtspr SPRN_MMCR0, r3 + isync +23: + + /* + * For external and machine check interrupts, we need + * to call the Linux handler to process the interrupt. + * We do that by jumping to absolute address 0x500 for + * external interrupts, or the machine_check_fwnmi label + * for machine checks (since firmware might have patched + * the vector area at 0x200). The [h]rfid at the end of the + * handler will return to the book3s_hv_interrupts.S code. + * For other interrupts we do the rfid to get back + * to the book3s_hv_interrupts.S code here. + */ + ld r8, 112+PPC_LR_STKOFF(r1) + addi r1, r1, 112 + ld r7, HSTATE_HOST_MSR(r13) + + cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL +BEGIN_FTR_SECTION + beq 11f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* RFI into the highmem handler, or branch to interrupt handler */ + mfmsr r6 + li r0, MSR_RI + andc r6, r6, r0 + mtmsrd r6, 1 /* Clear RI in MSR */ + mtsrr0 r8 + mtsrr1 r7 + beqa 0x500 /* external interrupt (PPC970) */ + beq cr1, 13f /* machine check */ + RFI + + /* On POWER7, we have external interrupts set to use HSRR0/1 */ +11: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + ba 0x500 + +13: b machine_check_fwnmi + /* * We come in here when wakened from nap mode on a secondary hw thread. @@ -137,7 +208,7 @@ kvm_start_guest: cmpdi r4,0 /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest - b kvmppc_hv_entry + b 30f 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ b kvm_no_guest @@ -147,6 +218,57 @@ kvm_start_guest: stw r8,HSTATE_SAVED_XIRR(r13) b kvm_no_guest +30: bl kvmppc_hv_entry + + /* Back from the guest, go back to nap */ + /* Clear our vcpu pointer so we don't come back in early */ + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + lwsync + /* Clear any pending IPI - we're an offline thread */ + ld r5, HSTATE_XICS_PHYS(r13) + li r7, XICS_XIRR + lwzcix r3, r5, r7 /* ack any pending interrupt */ + rlwinm. r0, r3, 0, 0xffffff /* any pending? */ + beq 37f + sync + li r0, 0xff + li r6, XICS_MFRR + stbcix r0, r5, r6 /* clear the IPI */ + stwcix r3, r5, r7 /* EOI it */ +37: sync + + /* increment the nap count and then go to nap mode */ + ld r4, HSTATE_KVM_VCORE(r13) + addi r4, r4, VCORE_NAP_COUNT + lwsync /* make previous updates visible */ +51: lwarx r3, 0, r4 + addi r3, r3, 1 + stwcx. r3, 0, r4 + bne 51b + +kvm_no_guest: + li r0, KVM_HWTHREAD_IN_NAP + stb r0, HSTATE_HWTHREAD_STATE(r13) + li r3, LPCR_PECE0 + mfspr r4, SPRN_LPCR + rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 + mtspr SPRN_LPCR, r4 + isync + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -159,7 +281,8 @@ kvmppc_hv_entry: * all other volatile GPRS = free */ mflr r0 - std r0, HSTATE_VMHANDLER(r13) + std r0, PPC_LR_STKOFF(r1) + stdu r1, -112(r1) /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -200,8 +323,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) + ld r7, VCPU_SIAR(r4) + ld r8, VCPU_SDAR(r4) mtspr SPRN_MMCR1, r5 mtspr SPRN_MMCRA, r6 + mtspr SPRN_SIAR, r7 + mtspr SPRN_SDAR, r8 mtspr SPRN_MMCR0, r3 isync @@ -254,22 +381,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) - /* Increment yield count if they have a VPA */ - ld r3, VCPU_VPA(r4) - cmpdi r3, 0 - beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) - addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) - li r6, 1 - stb r6, VCPU_VPA_DIRTY(r4) -25: /* Load up DAR and DSISR */ ld r5, VCPU_DAR(r4) lwz r6, VCPU_DSISR(r4) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 + li r6, KVM_GUEST_MODE_HOST_HV + stb r6, HSTATE_IN_GUEST(r13) + BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) @@ -343,7 +463,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) bdnz 28b ptesync -22: li r0,1 + /* Add timebase offset onto timebase */ +22: ld r8,VCORE_TB_OFFSET(r5) + cmpdi r8,0 + beq 37f + mftb r6 /* current host timebase */ + add r8,r8,r6 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ + mftb r7 /* check if lower 24 bits overflowed */ + clrldi r6,r6,40 + clrldi r7,r7,40 + cmpld r7,r6 + bge 37f + addis r8,r8,0x100 /* if so, increment upper 40 bits */ + mtspr SPRN_TBU40,r8 + + /* Load guest PCR value to select appropriate compat mode */ +37: ld r7, VCORE_PCR(r5) + cmpdi r7, 0 + beq 38f + mtspr SPRN_PCR, r7 +38: + li r0,1 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ b 10f @@ -353,12 +494,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 20b /* Set LPCR and RMOR. */ -10: ld r8,KVM_LPCR(r9) +10: ld r8,VCORE_LPCR(r5) mtspr SPRN_LPCR,r8 ld r8,KVM_RMOR(r9) mtspr SPRN_RMOR,r8 isync + /* Increment yield count if they have a VPA */ + ld r3, VCPU_VPA(r4) + cmpdi r3, 0 + beq 25f + lwz r5, LPPACA_YIELDCOUNT(r3) + addi r5, r5, 1 + stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) +25: /* Check if HDEC expires soon */ mfspr r3,SPRN_HDEC cmpwi r3,10 @@ -405,7 +556,8 @@ toc_tlbie_lock: bne 24b isync - ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ li r0,0x18f rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ or r0,r7,r0 @@ -541,7 +693,7 @@ fast_guest_return: mtspr SPRN_HSRR1,r11 /* Activate guest mode, so faults get handled by KVM */ - li r9, KVM_GUEST_MODE_GUEST + li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) /* Enter guest */ @@ -550,13 +702,15 @@ BEGIN_FTR_SECTION ld r5, VCPU_CFAR(r4) mtspr SPRN_CFAR, r5 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +BEGIN_FTR_SECTION + ld r0, VCPU_PPR(r4) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r5, VCPU_LR(r4) lwz r6, VCPU_CR(r4) mtlr r5 mtcr r6 - ld r0, VCPU_GPR(R0)(r4) ld r1, VCPU_GPR(R1)(r4) ld r2, VCPU_GPR(R2)(r4) ld r3, VCPU_GPR(R3)(r4) @@ -570,6 +724,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r12, VCPU_GPR(R12)(r4) ld r13, VCPU_GPR(R13)(r4) +BEGIN_FTR_SECTION + mtspr SPRN_PPR, r0 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) hrfid @@ -584,8 +742,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) /* * We come here from the first-level interrupt handlers. */ - .globl kvmppc_interrupt -kvmppc_interrupt: + .globl kvmppc_interrupt_hv +kvmppc_interrupt_hv: /* * Register contents: * R12 = interrupt vector @@ -595,6 +753,19 @@ kvmppc_interrupt: */ /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ std r9, HSTATE_HOST_R2(r13) + + lbz r9, HSTATE_IN_GUEST(r13) + cmpwi r9, KVM_GUEST_MODE_HOST_HV + beq kvmppc_bad_host_intr +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE + cmpwi r9, KVM_GUEST_MODE_GUEST + ld r9, HSTATE_HOST_R2(r13) + beq kvmppc_interrupt_pr +#endif + /* We're now back in the host but in guest MMU context */ + li r9, KVM_GUEST_MODE_HOST_HV + stb r9, HSTATE_IN_GUEST(r13) + ld r9, HSTATE_KVM_VCPU(r13) /* Save registers */ @@ -620,6 +791,10 @@ BEGIN_FTR_SECTION ld r3, HSTATE_CFAR(r13) std r3, VCPU_CFAR(r9) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +BEGIN_FTR_SECTION + ld r4, HSTATE_PPR(r13) + std r4, VCPU_PPR(r9) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Restore R1/R2 so we can handle faults */ ld r1, HSTATE_HOST_R1(r13) @@ -642,10 +817,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) std r3, VCPU_GPR(R13)(r9) std r4, VCPU_LR(r9) - /* Unset guest mode */ - li r0, KVM_GUEST_MODE_NONE - stb r0, HSTATE_IN_GUEST(r13) - stw r12,VCPU_TRAP(r9) /* Save HEIR (HV emulation assist reg) in last_inst @@ -696,46 +867,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * set, we know the host wants us out so let's do it now */ do_ext_interrupt: - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne ext_interrupt_to_host - - /* Now read the interrupt from the ICP */ - ld r5, HSTATE_XICS_PHYS(r13) - li r7, XICS_XIRR - cmpdi r5, 0 - beq- ext_interrupt_to_host - lwzcix r3, r5, r7 - rlwinm. r0, r3, 0, 0xffffff - sync - beq 3f /* if nothing pending in the ICP */ - - /* We found something in the ICP... - * - * If it's not an IPI, stash it in the PACA and return to - * the host, we don't (yet) handle directing real external - * interrupts directly to the guest - */ - cmpwi r0, XICS_IPI - bne ext_stash_for_host - - /* It's an IPI, clear the MFRR and EOI it */ - li r0, 0xff - li r6, XICS_MFRR - stbcix r0, r5, r6 /* clear the IPI */ - stwcix r3, r5, r7 /* EOI it */ - sync - - /* We need to re-check host IPI now in case it got set in the - * meantime. If it's clear, we bounce the interrupt to the - * guest - */ - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne- 1f + bl kvmppc_read_intr + cmpdi r3, 0 + bgt ext_interrupt_to_host /* Allright, looks like an IPI for the guest, we need to set MER */ -3: /* Check if any CPU is heading out to the host, if so head out too */ ld r5, HSTATE_KVM_VCORE(r13) lwz r0, VCORE_ENTRY_EXIT(r5) @@ -764,27 +900,9 @@ do_ext_interrupt: mtspr SPRN_LPCR, r8 b fast_guest_return - /* We raced with the host, we need to resend that IPI, bummer */ -1: li r0, IPI_PRIORITY - stbcix r0, r5, r6 /* set the IPI */ - sync - b ext_interrupt_to_host - -ext_stash_for_host: - /* It's not an IPI and it's for the host, stash it in the PACA - * before exit, it will be picked up by the host ICP driver - */ - stw r3, HSTATE_SAVED_XIRR(r13) ext_interrupt_to_host: guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ - /* Save DEC */ - mfspr r5,SPRN_DEC - mftb r6 - extsw r5,r5 - add r5,r5,r6 - std r5,VCPU_DEC_EXPIRES(r9) - /* Save more register state */ mfdar r6 mfdsisr r7 @@ -954,7 +1072,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync - li r0,0 + + /* Subtract timebase offset from timebase */ + ld r8,VCORE_TB_OFFSET(r5) + cmpdi r8,0 + beq 17f + mftb r6 /* current host timebase */ + subf r8,r8,r6 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ + mftb r7 /* check if lower 24 bits overflowed */ + clrldi r6,r6,40 + clrldi r7,r7,40 + cmpld r7,r6 + bge 17f + addis r8,r8,0x100 /* if so, increment upper 40 bits */ + mtspr SPRN_TBU40,r8 + + /* Reset PCR */ +17: ld r0, VCORE_PCR(r5) + cmpdi r0, 0 + beq 18f + li r0, 0 + mtspr SPRN_PCR, r0 +18: + /* Signal secondary CPUs to continue */ stb r0,VCORE_IN_GUEST(r5) lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 @@ -1052,6 +1193,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1: addi r8,r8,16 .endr + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + /* Save and reset AMR and UAMOR before turning on the MMU */ BEGIN_FTR_SECTION mfspr r5,SPRN_AMR @@ -1062,11 +1210,15 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r6 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Unset guest mode */ + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + /* Switch DSCR back to host value */ BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) - std r8, VCPU_DSCR(r7) + std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) @@ -1134,9 +1286,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ b 22f 21: mfspr r5, SPRN_MMCR1 + mfspr r7, SPRN_SIAR + mfspr r8, SPRN_SDAR std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) std r6, VCPU_MMCR + 16(r9) + std r7, VCPU_SIAR(r9) + std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 mfspr r4, SPRN_PMC2 mfspr r5, SPRN_PMC3 @@ -1158,103 +1314,30 @@ BEGIN_FTR_SECTION stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: + ld r0, 112+PPC_LR_STKOFF(r1) + addi r1, r1, 112 + mtlr r0 + blr +secondary_too_late: + ld r5,HSTATE_KVM_VCORE(r13) + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + ld r11,PACA_SLBSHADOWPTR(r13) - /* Secondary threads go off to take a nap on POWER7 */ -BEGIN_FTR_SECTION - lwz r0,VCPU_PTID(r9) - cmpwi r0,0 - bne secondary_nap -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* Restore host DABR and DABRX */ - ld r5,HSTATE_DABR(r13) - li r6,7 - mtspr SPRN_DABR,r5 - mtspr SPRN_DABRX,r6 - - /* Restore SPRG3 */ - ld r3,PACA_SPRG3(r13) - mtspr SPRN_SPRG3,r3 - - /* - * Reload DEC. HDEC interrupts were disabled when - * we reloaded the host's LPCR value. - */ - ld r3, HSTATE_DECEXP(r13) - mftb r4 - subf r4, r4, r3 - mtspr SPRN_DEC, r4 - - /* Reload the host's PMU registers */ - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ - lbz r4, LPPACA_PMCINUSE(r3) - cmpwi r4, 0 - beq 23f /* skip if not */ - lwz r3, HSTATE_PMC(r13) - lwz r4, HSTATE_PMC + 4(r13) - lwz r5, HSTATE_PMC + 8(r13) - lwz r6, HSTATE_PMC + 12(r13) - lwz r8, HSTATE_PMC + 16(r13) - lwz r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - lwz r10, HSTATE_PMC + 24(r13) - lwz r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - mtspr SPRN_PMC1, r3 - mtspr SPRN_PMC2, r4 - mtspr SPRN_PMC3, r5 - mtspr SPRN_PMC4, r6 - mtspr SPRN_PMC5, r8 - mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - ld r3, HSTATE_MMCR(r13) - ld r4, HSTATE_MMCR + 8(r13) - ld r5, HSTATE_MMCR + 16(r13) - mtspr SPRN_MMCR1, r4 - mtspr SPRN_MMCRA, r5 - mtspr SPRN_MMCR0, r3 - isync -23: - /* - * For external and machine check interrupts, we need - * to call the Linux handler to process the interrupt. - * We do that by jumping to absolute address 0x500 for - * external interrupts, or the machine_check_fwnmi label - * for machine checks (since firmware might have patched - * the vector area at 0x200). The [h]rfid at the end of the - * handler will return to the book3s_hv_interrupts.S code. - * For other interrupts we do the rfid to get back - * to the book3s_hv_interrupts.S code here. - */ - ld r8, HSTATE_VMHANDLER(r13) - ld r7, HSTATE_HOST_MSR(r13) - - cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL -BEGIN_FTR_SECTION - beq 11f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* RFI into the highmem handler, or branch to interrupt handler */ - mfmsr r6 - li r0, MSR_RI - andc r6, r6, r0 - mtmsrd r6, 1 /* Clear RI in MSR */ - mtsrr0 r8 - mtsrr1 r7 - beqa 0x500 /* external interrupt (PPC970) */ - beq cr1, 13f /* machine check */ - RFI - - /* On POWER7, we have external interrupts set to use HSRR0/1 */ -11: mtspr SPRN_HSRR0, r8 - mtspr SPRN_HSRR1, r7 - ba 0x500 - -13: b machine_check_fwnmi + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r11) + ld r6,SLBSHADOW_SAVEAREA+8(r11) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r11,r11,16 + .endr + b 22b /* * Check whether an HDSI is an HPTE not found fault or something else. @@ -1333,7 +1416,7 @@ fast_interrupt_c_return: stw r8, VCPU_LAST_INST(r9) /* Unset guest mode. */ - li r0, KVM_GUEST_MODE_NONE + li r0, KVM_GUEST_MODE_HOST_HV stb r0, HSTATE_IN_GUEST(r13) b guest_exit_cont @@ -1701,67 +1784,70 @@ machine_check_realmode: rotldi r11, r11, 63 b fast_interrupt_c_return -secondary_too_late: - ld r5,HSTATE_KVM_VCORE(r13) - HMT_LOW -13: lbz r3,VCORE_IN_GUEST(r5) - cmpwi r3,0 - bne 13b - HMT_MEDIUM - ld r11,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r11) - ld r6,SLBSHADOW_SAVEAREA+8(r11) - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r11,r11,16 - .endr +/* + * Determine what sort of external interrupt is pending (if any). + * Returns: + * 0 if no interrupt is pending + * 1 if an interrupt is pending that needs to be handled by the host + * -1 if there was a guest wakeup IPI (which has now been cleared) + */ +kvmppc_read_intr: + /* see if a host IPI is pending */ + li r3, 1 + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne 1f -secondary_nap: - /* Clear our vcpu pointer so we don't come back in early */ - li r0, 0 - std r0, HSTATE_KVM_VCPU(r13) - lwsync - /* Clear any pending IPI - assume we're a secondary thread */ - ld r5, HSTATE_XICS_PHYS(r13) + /* Now read the interrupt from the ICP */ + ld r6, HSTATE_XICS_PHYS(r13) li r7, XICS_XIRR - lwzcix r3, r5, r7 /* ack any pending interrupt */ - rlwinm. r0, r3, 0, 0xffffff /* any pending? */ - beq 37f + cmpdi r6, 0 + beq- 1f + lwzcix r0, r6, r7 + rlwinm. r3, r0, 0, 0xffffff sync - li r0, 0xff - li r6, XICS_MFRR - stbcix r0, r5, r6 /* clear the IPI */ - stwcix r3, r5, r7 /* EOI it */ -37: sync + beq 1f /* if nothing pending in the ICP */ - /* increment the nap count and then go to nap mode */ - ld r4, HSTATE_KVM_VCORE(r13) - addi r4, r4, VCORE_NAP_COUNT - lwsync /* make previous updates visible */ -51: lwarx r3, 0, r4 - addi r3, r3, 1 - stwcx. r3, 0, r4 - bne 51b + /* We found something in the ICP... + * + * If it's not an IPI, stash it in the PACA and return to + * the host, we don't (yet) handle directing real external + * interrupts directly to the guest + */ + cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ + li r3, 1 + bne 42f -kvm_no_guest: - li r0, KVM_HWTHREAD_IN_NAP - stb r0, HSTATE_HWTHREAD_STATE(r13) + /* It's an IPI, clear the MFRR and EOI it */ + li r3, 0xff + li r8, XICS_MFRR + stbcix r3, r6, r8 /* clear the IPI */ + stwcix r0, r6, r7 /* EOI it */ + sync - li r3, LPCR_PECE0 - mfspr r4, SPRN_LPCR - rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 - mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + /* We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne- 43f + + /* OK, it's an IPI for us */ + li r3, -1 +1: blr + +42: /* It's not an IPI and it's for the host, stash it in the PACA + * before exit, it will be picked up by the host ICP driver + */ + stw r0, HSTATE_SAVED_XIRR(r13) + b 1b + +43: /* We raced with the host, we need to resend that IPI, bummer */ + li r0, IPI_PRIORITY + stbcix r0, r6, r8 /* set the IPI */ + sync + b 1b /* * Save away FP, VMX and VSX registers. @@ -1879,3 +1965,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r7,VCPU_VRSAVE(r4) mtspr SPRN_VRSAVE,r7 blr + +/* + * We come here if we get any exception or interrupt while we are + * executing host real mode code while in guest MMU context. + * For now just spin, but we should do something better. + */ +kvmppc_bad_host_intr: + b . diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 17cfae5497a3..f4dd041c14ea 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -26,8 +26,12 @@ #if defined(CONFIG_PPC_BOOK3S_64) #define FUNC(name) GLUE(.,name) +#define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU + #elif defined(CONFIG_PPC_BOOK3S_32) #define FUNC(name) name +#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) + #endif /* CONFIG_PPC_BOOK3S_XX */ #define VCPU_LOAD_NVGPRS(vcpu) \ @@ -87,8 +91,14 @@ kvm_start_entry: VCPU_LOAD_NVGPRS(r4) kvm_start_lightweight: + /* Copy registers into shadow vcpu so we can access them in real mode */ + GET_SHADOW_VCPU(r3) + bl FUNC(kvmppc_copy_to_svcpu) + nop + REST_GPR(4, r1) #ifdef CONFIG_PPC_BOOK3S_64 + /* Get the dcbz32 flag */ PPC_LL r3, VCPU_HFLAGS(r4) rldicl r3, r3, 0, 63 /* r3 &= 1 */ stb r3, HSTATE_RESTORE_HID5(r13) @@ -111,9 +121,6 @@ kvm_start_lightweight: * */ -.global kvmppc_handler_highmem -kvmppc_handler_highmem: - /* * Register usage at this point: * @@ -125,18 +132,31 @@ kvmppc_handler_highmem: * */ - /* R7 = vcpu */ - PPC_LL r7, GPR4(r1) + /* Transfer reg values from shadow vcpu back to vcpu struct */ + /* On 64-bit, interrupts are still off at this point */ + PPC_LL r3, GPR4(r1) /* vcpu pointer */ + GET_SHADOW_VCPU(r4) + bl FUNC(kvmppc_copy_from_svcpu) + nop #ifdef CONFIG_PPC_BOOK3S_64 + /* Re-enable interrupts */ + ld r3, HSTATE_HOST_MSR(r13) + ori r3, r3, MSR_EE + MTMSR_EERI(r3) + /* * Reload kernel SPRG3 value. * No need to save guest value as usermode can't modify SPRG3. */ ld r3, PACA_SPRG3(r13) mtspr SPRN_SPRG3, r3 + #endif /* CONFIG_PPC_BOOK3S_64 */ + /* R7 = vcpu */ + PPC_LL r7, GPR4(r1) + PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) PPC_STL r16, VCPU_GPR(R16)(r7) @@ -161,7 +181,7 @@ kvmppc_handler_highmem: /* Restore r3 (kvm_run) and r4 (vcpu) */ REST_2GPRS(3, r1) - bl FUNC(kvmppc_handle_exit) + bl FUNC(kvmppc_handle_exit_pr) /* If RESUME_GUEST, get back in the loop */ cmpwi r3, RESUME_GUEST diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index da8b13c4b776..5a1ab1250a05 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -28,7 +28,7 @@ #include <asm/mmu_context.h> #include <asm/hw_irq.h> -#include "trace.h" +#include "trace_pr.h" #define PTE_SIZE 12 @@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) HPTEG_HASH_BITS_VPTE_LONG); } +#ifdef CONFIG_PPC_BOOK3S_64 +static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage) +{ + return hash_64((vpage & 0xffffffff0ULL) >> 4, + HPTEG_HASH_BITS_VPTE_64K); +} +#endif + void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; @@ -83,6 +91,15 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) hlist_add_head_rcu(&pte->list_vpte_long, &vcpu3s->hpte_hash_vpte_long[index]); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Add to vPTE_64k list */ + index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage); + hlist_add_head_rcu(&pte->list_vpte_64k, + &vcpu3s->hpte_hash_vpte_64k[index]); +#endif + + vcpu3s->hpte_cache_count++; + spin_unlock(&vcpu3s->mmu_lock); } @@ -113,10 +130,13 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) hlist_del_init_rcu(&pte->list_pte_long); hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); +#ifdef CONFIG_PPC_BOOK3S_64 + hlist_del_init_rcu(&pte->list_vpte_64k); +#endif + vcpu3s->hpte_cache_count--; spin_unlock(&vcpu3s->mmu_lock); - vcpu3s->hpte_cache_count--; call_rcu(&pte->rcu_head, free_pte_rcu); } @@ -219,6 +239,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) rcu_read_unlock(); } +#ifdef CONFIG_PPC_BOOK3S_64 +/* Flush with mask 0xffffffff0 */ +static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + struct hlist_head *list; + struct hpte_cache *pte; + u64 vp_mask = 0xffffffff0ULL; + + list = &vcpu3s->hpte_hash_vpte_64k[ + kvmppc_mmu_hash_vpte_64k(guest_vp)]; + + rcu_read_lock(); + + /* Check the list for matching entries and invalidate */ + hlist_for_each_entry_rcu(pte, list, list_vpte_64k) + if ((pte->pte.vpage & vp_mask) == guest_vp) + invalidate_pte(vcpu, pte); + + rcu_read_unlock(); +} +#endif + /* Flush with mask 0xffffff000 */ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { @@ -249,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) case 0xfffffffffULL: kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); break; +#ifdef CONFIG_PPC_BOOK3S_64 + case 0xffffffff0ULL: + kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp); + break; +#endif case 0xffffff000ULL: kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); break; @@ -285,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hpte_cache *pte; - pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); - vcpu3s->hpte_cache_count++; - if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) kvmppc_mmu_pte_flush_all(vcpu); + pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); + return pte; } +void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte) +{ + kmem_cache_free(hpte_cache, pte); +} + void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) { kvmppc_mmu_pte_flush(vcpu, 0, 0); @@ -320,6 +372,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); +#ifdef CONFIG_PPC_BOOK3S_64 + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k, + ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k)); +#endif spin_lock_init(&vcpu3s->mmu_lock); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 27db1e665959..df36cf2ed22b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -40,8 +40,12 @@ #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/highmem.h> +#include <linux/module.h> -#include "trace.h" +#include "book3s.h" + +#define CREATE_TRACE_POINTS +#include "trace_pr.h" /* #define EXIT_DEBUG */ /* #define DEBUG_EXT */ @@ -56,29 +60,25 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #define HW_PAGE_SIZE PAGE_SIZE #endif -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); - memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, - sizeof(get_paca()->shadow_vcpu)); svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu_put(svcpu); #endif vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 - current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; + current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PPC_BOOK3S_64 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); - memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, - sizeof(get_paca()->shadow_vcpu)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; svcpu_put(svcpu); #endif @@ -87,7 +87,61 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +/* Copy data needed by real-mode code from vcpu to shadow vcpu */ +void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, + struct kvm_vcpu *vcpu) +{ + svcpu->gpr[0] = vcpu->arch.gpr[0]; + svcpu->gpr[1] = vcpu->arch.gpr[1]; + svcpu->gpr[2] = vcpu->arch.gpr[2]; + svcpu->gpr[3] = vcpu->arch.gpr[3]; + svcpu->gpr[4] = vcpu->arch.gpr[4]; + svcpu->gpr[5] = vcpu->arch.gpr[5]; + svcpu->gpr[6] = vcpu->arch.gpr[6]; + svcpu->gpr[7] = vcpu->arch.gpr[7]; + svcpu->gpr[8] = vcpu->arch.gpr[8]; + svcpu->gpr[9] = vcpu->arch.gpr[9]; + svcpu->gpr[10] = vcpu->arch.gpr[10]; + svcpu->gpr[11] = vcpu->arch.gpr[11]; + svcpu->gpr[12] = vcpu->arch.gpr[12]; + svcpu->gpr[13] = vcpu->arch.gpr[13]; + svcpu->cr = vcpu->arch.cr; + svcpu->xer = vcpu->arch.xer; + svcpu->ctr = vcpu->arch.ctr; + svcpu->lr = vcpu->arch.lr; + svcpu->pc = vcpu->arch.pc; +} + +/* Copy data touched by real-mode code from shadow vcpu back to vcpu */ +void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, + struct kvmppc_book3s_shadow_vcpu *svcpu) +{ + vcpu->arch.gpr[0] = svcpu->gpr[0]; + vcpu->arch.gpr[1] = svcpu->gpr[1]; + vcpu->arch.gpr[2] = svcpu->gpr[2]; + vcpu->arch.gpr[3] = svcpu->gpr[3]; + vcpu->arch.gpr[4] = svcpu->gpr[4]; + vcpu->arch.gpr[5] = svcpu->gpr[5]; + vcpu->arch.gpr[6] = svcpu->gpr[6]; + vcpu->arch.gpr[7] = svcpu->gpr[7]; + vcpu->arch.gpr[8] = svcpu->gpr[8]; + vcpu->arch.gpr[9] = svcpu->gpr[9]; + vcpu->arch.gpr[10] = svcpu->gpr[10]; + vcpu->arch.gpr[11] = svcpu->gpr[11]; + vcpu->arch.gpr[12] = svcpu->gpr[12]; + vcpu->arch.gpr[13] = svcpu->gpr[13]; + vcpu->arch.cr = svcpu->cr; + vcpu->arch.xer = svcpu->xer; + vcpu->arch.ctr = svcpu->ctr; + vcpu->arch.lr = svcpu->lr; + vcpu->arch.pc = svcpu->pc; + vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; + vcpu->arch.fault_dar = svcpu->fault_dar; + vcpu->arch.fault_dsisr = svcpu->fault_dsisr; + vcpu->arch.last_inst = svcpu->last_inst; +} + +static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) { int r = 1; /* Indicate we want to get back into the guest */ @@ -100,44 +154,69 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) } /************* MMU Notifiers *************/ +static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, + unsigned long end) +{ + long i; + struct kvm_vcpu *vcpu; + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn, gfn+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + kvm_for_each_vcpu(i, vcpu, kvm) + kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, + gfn_end << PAGE_SHIFT); + } +} -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) { trace_kvm_unmap_hva(hva); - /* - * Flush all shadow tlb entries everywhere. This is slow, but - * we are 100% sure that we catch the to be unmapped page - */ - kvm_flush_remote_tlbs(kvm); + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { - /* kvm_unmap_hva flushes everything anyways */ - kvm_unmap_hva(kvm, start); + do_kvm_unmap_hva(kvm, start, end); return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) { /* XXX could be more clever ;) */ return 0; } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) { /* XXX could be more clever ;) */ return 0; } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) { /* The page will get remapped properly on its next fault */ - kvm_unmap_hva(kvm, hva); + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); } /*****************************************/ @@ -159,7 +238,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) vcpu->arch.shadow_msr = smsr; } -void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) { ulong old_msr = vcpu->arch.shared->msr; @@ -219,7 +298,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } -void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) { u32 host_pvr; @@ -256,6 +335,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); + /* + * If they're asking for POWER6 or later, set the flag + * indicating that we can do multiple large page sizes + * and 1TB segments. + * Also set the flag that indicates that tlbie has the large + * page bit in the RB operand instead of the instruction. + */ + switch (PVR_VER(pvr)) { + case PVR_POWER6: + case PVR_POWER7: + case PVR_POWER7p: + case PVR_POWER8: + vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | + BOOK3S_HFLAG_NEW_TLBIE; + break; + } + #ifdef CONFIG_PPC_BOOK3S_32 /* 32 bit Book3S always has 32 byte dcbz */ vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; @@ -334,6 +430,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong eaddr, int vec) { bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); + bool iswrite = false; int r = RESUME_GUEST; int relocated; int page_found = 0; @@ -344,10 +441,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 vsid; relocated = data ? dr : ir; + if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) + iswrite = true; /* Resolve real address if translation turned on */ if (relocated) { - page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); + page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite); } else { pte.may_execute = true; pte.may_read = true; @@ -355,6 +454,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.raddr = eaddr & KVM_PAM; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; + pte.page_size = MMU_PAGE_64K; } switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { @@ -388,22 +488,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (page_found == -ENOENT) { /* Page not found in guest PTE entries */ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - vcpu->arch.shared->dsisr = svcpu->fault_dsisr; + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr; vcpu->arch.shared->msr |= - (svcpu->shadow_srr1 & 0x00000000f8000000ULL); - svcpu_put(svcpu); + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EPERM) { /* Storage protection */ - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; vcpu->arch.shared->msr |= - svcpu->shadow_srr1 & 0x00000000f8000000ULL; - svcpu_put(svcpu); + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; kvmppc_book3s_queue_irqprio(vcpu, vec); } else if (page_found == -EINVAL) { /* Page not found in guest SLB */ @@ -411,12 +507,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { + /* + * There is already a host HPTE there, presumably + * a read-only one for a page the guest thinks + * is writable, so get rid of it first. + */ + kvmppc_mmu_unmap_page(vcpu, &pte); + } /* The guest's PTE is not mapped yet. Map on the host */ - kvmppc_mmu_map_page(vcpu, &pte); + kvmppc_mmu_map_page(vcpu, &pte, iswrite); if (data) vcpu->stat.sp_storage++; else if (vcpu->arch.mmu.is_dcbz32(vcpu) && - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) kvmppc_patch_dcbz(vcpu, &pte); } else { /* MMIO */ @@ -619,13 +723,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) if (lost_ext & MSR_FP) kvmppc_load_up_fpu(); +#ifdef CONFIG_ALTIVEC if (lost_ext & MSR_VEC) kvmppc_load_up_altivec(); +#endif current->thread.regs->msr |= lost_ext; } -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) { int r = RESUME_HOST; int s; @@ -643,25 +749,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: { - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong shadow_srr1 = svcpu->shadow_srr1; + ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ - if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { - kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); - r = RESUME_GUEST; + { + struct kvmppc_book3s_shadow_vcpu *svcpu; + u32 sr; + + svcpu = svcpu_get(vcpu); + sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]; svcpu_put(svcpu); - break; + if (sr == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + r = RESUME_GUEST; + break; + } } #endif - svcpu_put(svcpu); /* only care about PTEG not found errors, but leave NX alone */ if (shadow_srr1 & 0x40000000) { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); vcpu->stat.sp_instruc++; } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { @@ -682,25 +795,36 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_DATA_STORAGE: { ulong dar = kvmppc_get_fault_dar(vcpu); - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - u32 fault_dsisr = svcpu->fault_dsisr; + u32 fault_dsisr = vcpu->arch.fault_dsisr; vcpu->stat.pf_storage++; #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ - if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { - kvmppc_mmu_map_segment(vcpu, dar); - r = RESUME_GUEST; + { + struct kvmppc_book3s_shadow_vcpu *svcpu; + u32 sr; + + svcpu = svcpu_get(vcpu); + sr = svcpu->sr[dar >> SID_SHIFT]; svcpu_put(svcpu); - break; + if (sr == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, dar); + r = RESUME_GUEST; + break; + } } #endif - svcpu_put(svcpu); - /* The only case we need to handle is missing shadow PTEs */ - if (fault_dsisr & DSISR_NOHPTE) { + /* + * We need to handle missing shadow PTEs, and + * protection faults due to us mapping a page read-only + * when the guest thinks it is writable. + */ + if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); } else { vcpu->arch.shared->dar = dar; vcpu->arch.shared->dsisr = fault_dsisr; @@ -743,13 +867,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_H_EMUL_ASSIST: { enum emulation_result er; - struct kvmppc_book3s_shadow_vcpu *svcpu; ulong flags; program_interrupt: - svcpu = svcpu_get(vcpu); - flags = svcpu->shadow_srr1 & 0x1f0000ull; - svcpu_put(svcpu); + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; if (vcpu->arch.shared->msr & MSR_PR) { #ifdef EXIT_DEBUG @@ -798,7 +919,7 @@ program_interrupt: ulong cmd = kvmppc_get_gpr(vcpu, 3); int i; -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { r = RESUME_GUEST; break; @@ -881,9 +1002,7 @@ program_interrupt: break; default: { - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); - ulong shadow_srr1 = svcpu->shadow_srr1; - svcpu_put(svcpu); + ulong shadow_srr1 = vcpu->arch.shadow_srr1; /* Ugh - bork here! What did we get? */ printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); @@ -920,8 +1039,8 @@ program_interrupt: return r; } -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int i; @@ -947,13 +1066,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); int i; - kvmppc_set_pvr(vcpu, sregs->pvr); + kvmppc_set_pvr_pr(vcpu, sregs->pvr); vcpu3s->sdr1 = sregs->u.s.sdr1; if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { @@ -983,7 +1102,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; @@ -1012,7 +1132,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) return r; } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; @@ -1042,28 +1163,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) return r; } -int kvmppc_core_check_processor_compat(void) -{ - return 0; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, + unsigned int id) { struct kvmppc_vcpu_book3s *vcpu_book3s; struct kvm_vcpu *vcpu; int err = -ENOMEM; unsigned long p; - vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); - if (!vcpu_book3s) + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) goto out; - vcpu_book3s->shadow_vcpu = - kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); - if (!vcpu_book3s->shadow_vcpu) + vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); + if (!vcpu_book3s) goto free_vcpu; + vcpu->arch.book3s = vcpu_book3s; + +#ifdef CONFIG_KVM_BOOK3S_32 + vcpu->arch.shadow_vcpu = + kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); + if (!vcpu->arch.shadow_vcpu) + goto free_vcpu3s; +#endif - vcpu = &vcpu_book3s->vcpu; err = kvm_vcpu_init(vcpu, kvm, id); if (err) goto free_shadow_vcpu; @@ -1076,13 +1199,19 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); #ifdef CONFIG_PPC_BOOK3S_64 - /* default to book3s_64 (970fx) */ + /* + * Default to the same as the host if we're on sufficiently + * recent machine that we have 1TB segments; + * otherwise default to PPC970FX. + */ vcpu->arch.pvr = 0x3C0301; + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + vcpu->arch.pvr = mfspr(SPRN_PVR); #else /* default to book3s_32 (750) */ vcpu->arch.pvr = 0x84202; #endif - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); vcpu->arch.slb_nr = 64; vcpu->arch.shadow_msr = MSR_USER64; @@ -1096,24 +1225,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: - kfree(vcpu_book3s->shadow_vcpu); -free_vcpu: +#ifdef CONFIG_KVM_BOOK3S_32 + kfree(vcpu->arch.shadow_vcpu); +free_vcpu3s: +#endif vfree(vcpu_book3s); +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); out: return ERR_PTR(err); } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); - kfree(vcpu_book3s->shadow_vcpu); +#ifdef CONFIG_KVM_BOOK3S_32 + kfree(vcpu->arch.shadow_vcpu); +#endif vfree(vcpu_book3s); + kmem_cache_free(kvm_vcpu_cache, vcpu); } -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; double fpr[32][TS_FPRWIDTH]; @@ -1222,8 +1358,8 @@ out: /* * Get (and clear) the dirty memory log for a memory slot. */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log) +static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, + struct kvm_dirty_log *log) { struct kvm_memory_slot *memslot; struct kvm_vcpu *vcpu; @@ -1258,67 +1394,100 @@ out: return r; } -#ifdef CONFIG_PPC64 -int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - info->flags = KVM_PPC_1T_SEGMENTS; - - /* SLB is always 64 entries */ - info->slb_size = 64; - - /* Standard 4k base page size segment */ - info->sps[0].page_shift = 12; - info->sps[0].slb_enc = 0; - info->sps[0].enc[0].page_shift = 12; - info->sps[0].enc[0].pte_enc = 0; - - /* Standard 16M large page size segment */ - info->sps[1].page_shift = 24; - info->sps[1].slb_enc = SLB_VSID_L; - info->sps[1].enc[0].page_shift = 24; - info->sps[1].enc[0].pte_enc = 0; + return; +} +static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) +{ return 0; } -#endif /* CONFIG_PPC64 */ -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old) { + return; } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, - unsigned long npages) +static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) { - return 0; + return; } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) +static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } -void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + +#ifdef CONFIG_PPC64 +static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, + struct kvm_ppc_smmu_info *info) { -} + long int i; + struct kvm_vcpu *vcpu; + + info->flags = 0; + + /* SLB is always 64 entries */ + info->slb_size = 64; + + /* Standard 4k base page size segment */ + info->sps[0].page_shift = 12; + info->sps[0].slb_enc = 0; + info->sps[0].enc[0].page_shift = 12; + info->sps[0].enc[0].pte_enc = 0; + + /* + * 64k large page size. + * We only want to put this in if the CPUs we're emulating + * support it, but unfortunately we don't have a vcpu easily + * to hand here to test. Just pick the first vcpu, and if + * that doesn't exist yet, report the minimum capability, + * i.e., no 64k pages. + * 1T segment support goes along with 64k pages. + */ + i = 1; + vcpu = kvm_get_vcpu(kvm, 0); + if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { + info->flags = KVM_PPC_1T_SEGMENTS; + info->sps[i].page_shift = 16; + info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; + info->sps[i].enc[0].page_shift = 16; + info->sps[i].enc[0].pte_enc = 1; + ++i; + } + + /* Standard 16M large page size segment */ + info->sps[i].page_shift = 24; + info->sps[i].slb_enc = SLB_VSID_L; + info->sps[i].enc[0].page_shift = 24; + info->sps[i].enc[0].pte_enc = 0; -void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) + return 0; +} +#else +static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, + struct kvm_ppc_smmu_info *info) { + /* We should not get called */ + BUG(); } +#endif /* CONFIG_PPC64 */ static unsigned int kvm_global_user_count = 0; static DEFINE_SPINLOCK(kvm_global_user_count_lock); -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_pr(struct kvm *kvm) { -#ifdef CONFIG_PPC64 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); -#endif + mutex_init(&kvm->arch.hpt_mutex); if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); @@ -1329,7 +1498,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) return 0; } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) { #ifdef CONFIG_PPC64 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); @@ -1344,26 +1513,81 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) } } -static int kvmppc_book3s_init(void) +static int kvmppc_core_check_processor_compat_pr(void) { - int r; + /* we are always compatible */ + return 0; +} - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, - THIS_MODULE); +static long kvm_arch_vm_ioctl_pr(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -ENOTTY; +} - if (r) +static struct kvmppc_ops kvm_ops_pr = { + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, + .get_one_reg = kvmppc_get_one_reg_pr, + .set_one_reg = kvmppc_set_one_reg_pr, + .vcpu_load = kvmppc_core_vcpu_load_pr, + .vcpu_put = kvmppc_core_vcpu_put_pr, + .set_msr = kvmppc_set_msr_pr, + .vcpu_run = kvmppc_vcpu_run_pr, + .vcpu_create = kvmppc_core_vcpu_create_pr, + .vcpu_free = kvmppc_core_vcpu_free_pr, + .check_requests = kvmppc_core_check_requests_pr, + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, + .flush_memslot = kvmppc_core_flush_memslot_pr, + .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, + .commit_memory_region = kvmppc_core_commit_memory_region_pr, + .unmap_hva = kvm_unmap_hva_pr, + .unmap_hva_range = kvm_unmap_hva_range_pr, + .age_hva = kvm_age_hva_pr, + .test_age_hva = kvm_test_age_hva_pr, + .set_spte_hva = kvm_set_spte_hva_pr, + .mmu_destroy = kvmppc_mmu_destroy_pr, + .free_memslot = kvmppc_core_free_memslot_pr, + .create_memslot = kvmppc_core_create_memslot_pr, + .init_vm = kvmppc_core_init_vm_pr, + .destroy_vm = kvmppc_core_destroy_vm_pr, + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, + .emulate_op = kvmppc_core_emulate_op_pr, + .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, + .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, + .fast_vcpu_kick = kvm_vcpu_kick, + .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, +}; + + +int kvmppc_book3s_init_pr(void) +{ + int r; + + r = kvmppc_core_check_processor_compat_pr(); + if (r < 0) return r; - r = kvmppc_mmu_hpte_sysinit(); + kvm_ops_pr.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_pr; + r = kvmppc_mmu_hpte_sysinit(); return r; } -static void kvmppc_book3s_exit(void) +void kvmppc_book3s_exit_pr(void) { + kvmppc_pr_ops = NULL; kvmppc_mmu_hpte_sysexit(); - kvm_exit(); } -module_init(kvmppc_book3s_init); -module_exit(kvmppc_book3s_exit); +/* + * We only support separate modules for book3s 64 + */ +#ifdef CONFIG_PPC_BOOK3S_64 + +module_init(kvmppc_book3s_init_pr); +module_exit(kvmppc_book3s_exit_pr); + +MODULE_LICENSE("GPL"); +#endif diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index da0e0bc268bd..5efa97b993d8 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -21,6 +21,8 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> +#define HPTE_SIZE 16 /* bytes per HPT entry */ + static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); @@ -40,32 +42,41 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) long pte_index = kvmppc_get_gpr(vcpu, 5); unsigned long pteg[2 * 8]; unsigned long pteg_addr, i, *hpte; + long int ret; + i = pte_index & 7; pte_index &= ~7UL; pteg_addr = get_pteg_addr(vcpu, pte_index); + mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); hpte = pteg; + ret = H_PTEG_FULL; if (likely((flags & H_EXACT) == 0)) { - pte_index &= ~7UL; for (i = 0; ; ++i) { if (i == 8) - return H_PTEG_FULL; + goto done; if ((*hpte & HPTE_V_VALID) == 0) break; hpte += 2; } } else { - i = kvmppc_get_gpr(vcpu, 5) & 7UL; hpte += i * 2; + if (*hpte & HPTE_V_VALID) + goto done; } hpte[0] = kvmppc_get_gpr(vcpu, 6); hpte[1] = kvmppc_get_gpr(vcpu, 7); - copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + pteg_addr += i * HPTE_SIZE; + copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); kvmppc_set_gpr(vcpu, 4, pte_index | i); + ret = H_SUCCESS; + + done: + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); + kvmppc_set_gpr(vcpu, 3, ret); return EMULATE_DONE; } @@ -77,26 +88,31 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) unsigned long avpn = kvmppc_get_gpr(vcpu, 6); unsigned long v = 0, pteg, rb; unsigned long pte[2]; + long int ret; pteg = get_pteg_addr(vcpu, pte_index); + mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || - ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); - return EMULATE_DONE; - } + ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) + goto done; copy_to_user((void __user *)pteg, &v, sizeof(v)); rb = compute_tlbie_rb(pte[0], pte[1], pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + ret = H_SUCCESS; kvmppc_set_gpr(vcpu, 4, pte[0]); kvmppc_set_gpr(vcpu, 5, pte[1]); + done: + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); + kvmppc_set_gpr(vcpu, 3, ret); + return EMULATE_DONE; } @@ -124,6 +140,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) int paramnr = 4; int ret = H_SUCCESS; + mutex_lock(&vcpu->kvm->arch.hpt_mutex); for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); @@ -172,6 +189,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) } kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); } + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); kvmppc_set_gpr(vcpu, 3, ret); return EMULATE_DONE; @@ -184,15 +202,16 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) unsigned long avpn = kvmppc_get_gpr(vcpu, 6); unsigned long rb, pteg, r, v; unsigned long pte[2]; + long int ret; pteg = get_pteg_addr(vcpu, pte_index); + mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || - ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); - return EMULATE_DONE; - } + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) + goto done; v = pte[0]; r = pte[1]; @@ -207,8 +226,11 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) rb = compute_tlbie_rb(v, r, pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); copy_to_user((void __user *)pteg, pte, sizeof(pte)); + ret = H_SUCCESS; - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + done: + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); + kvmppc_set_gpr(vcpu, 3, ret); return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 8f7633e3afb8..a38c4c9edab8 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -38,32 +38,6 @@ #define FUNC(name) GLUE(.,name) - .globl kvmppc_skip_interrupt -kvmppc_skip_interrupt: - /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. - */ - mfspr r13, SPRN_SRR0 - addi r13, r13, 4 - mtspr SPRN_SRR0, r13 - GET_SCRATCH0(r13) - rfid - b . - - .globl kvmppc_skip_Hinterrupt -kvmppc_skip_Hinterrupt: - /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. - */ - mfspr r13, SPRN_HSRR0 - addi r13, r13, 4 - mtspr SPRN_HSRR0, r13 - GET_SCRATCH0(r13) - hrfid - b . - #elif defined(CONFIG_PPC_BOOK3S_32) #define FUNC(name) name @@ -179,11 +153,15 @@ _GLOBAL(kvmppc_entry_trampoline) li r6, MSR_IR | MSR_DR andc r6, r5, r6 /* Clear DR and IR in MSR value */ +#ifdef CONFIG_PPC_BOOK3S_32 /* * Set EE in HOST_MSR so that it's enabled when we get into our - * C exit handler function + * C exit handler function. On 64-bit we delay enabling + * interrupts until we have finished transferring stuff + * to or from the PACA. */ ori r5, r5, MSR_EE +#endif mtsrr0 r7 mtsrr1 r6 RFI diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 3219ba895246..cf95cdef73c9 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -260,6 +260,7 @@ fail: */ return rc; } +EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall); void kvmppc_rtas_tokens_free(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 1abe4788191a..bc50c97751d3 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -161,8 +161,8 @@ kvmppc_handler_trampoline_enter_end: .global kvmppc_handler_trampoline_exit kvmppc_handler_trampoline_exit: -.global kvmppc_interrupt -kvmppc_interrupt: +.global kvmppc_interrupt_pr +kvmppc_interrupt_pr: /* Register usage at this point: * diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a3a5cb8ee7ea..02a17dcf1610 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) } /* Check for real mode returning too hard */ - if (xics->real_mode) + if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm)) return kvmppc_xics_rm_complete(vcpu, req); switch (req) { @@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) return rc; } +EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); /* -- Initialisation code etc. -- */ @@ -1250,13 +1251,13 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) xics_debugfs_init(xics); -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE if (cpu_has_feature(CPU_FTR_ARCH_206)) { /* Enable real mode support */ xics->real_mode = ENABLE_REALMODE; xics->real_mode_dbg = DEBUG_REALMODE; } -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 17722d82f1d1..15d0149511eb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -40,7 +40,9 @@ #include "timing.h" #include "booke.h" -#include "trace.h" + +#define CREATE_TRACE_POINTS +#include "trace_booke.h" unsigned long kvmppc_booke_handlers; @@ -133,6 +135,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) #endif } +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) +{ + /* Synchronize guest's desire to get debug interrupts into shadow MSR */ +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr &= ~MSR_DE; + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; +#endif + + /* Force enable debug interrupts when user space wants to debug */ + if (vcpu->guest_debug) { +#ifdef CONFIG_KVM_BOOKE_HV + /* + * Since there is no shadow MSR, sync MSR_DE into the guest + * visible MSR. + */ + vcpu->arch.shared->msr |= MSR_DE; +#else + vcpu->arch.shadow_msr |= MSR_DE; + vcpu->arch.shared->msr &= ~MSR_DE; +#endif + } +} + /* * Helper function for "full" MSR writes. No need to call this if only * EE/CE/ME/DE/RI are changing. @@ -150,6 +175,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) kvmppc_mmu_msr_notify(vcpu, old_msr); kvmppc_vcpu_sync_spe(vcpu); kvmppc_vcpu_sync_fpu(vcpu); + kvmppc_vcpu_sync_debug(vcpu); } static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@ -655,6 +681,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret, s; + struct thread_struct thread; #ifdef CONFIG_PPC_FPU unsigned int fpscr; int fpexc_mode; @@ -696,6 +723,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_load_guest_fp(vcpu); #endif + /* Switch to guest debug context */ + thread.debug = vcpu->arch.shadow_dbg_reg; + switch_booke_debug_regs(&thread); + thread.debug = current->thread.debug; + current->thread.debug = vcpu->arch.shadow_dbg_reg; + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -703,6 +736,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* No need for kvm_guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ + /* Switch back to user space debug context */ + switch_booke_debug_regs(&thread); + current->thread.debug = thread.debug; + #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); @@ -758,6 +795,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) } } +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); + u32 dbsr = vcpu->arch.dbsr; + + run->debug.arch.status = 0; + run->debug.arch.address = vcpu->arch.pc; + + if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { + run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; + } else { + if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; + else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; + if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) + run->debug.arch.address = dbg_reg->dac1; + else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) + run->debug.arch.address = dbg_reg->dac2; + } + + return RESUME_HOST; +} + static void kvmppc_fill_pt_regs(struct pt_regs *regs) { ulong r1, ip, msr, lr; @@ -818,6 +879,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_CRITICAL: unknown_exception(®s); break; + case BOOKE_INTERRUPT_DEBUG: + /* Save DBSR before preemption is enabled */ + vcpu->arch.dbsr = mfspr(SPRN_DBSR); + kvmppc_clear_dbsr(); + break; } } @@ -1135,18 +1201,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } case BOOKE_INTERRUPT_DEBUG: { - u32 dbsr; - - vcpu->arch.pc = mfspr(SPRN_CSRR0); - - /* clear IAC events in DBSR register */ - dbsr = mfspr(SPRN_DBSR); - dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; - mtspr(SPRN_DBSR, dbsr); - - run->exit_reason = KVM_EXIT_DEBUG; + r = kvmppc_handle_debug(run, vcpu); + if (r == RESUME_HOST) + run->exit_reason = KVM_EXIT_DEBUG; kvmppc_account_exit(vcpu, DEBUG_EXITS); - r = RESUME_HOST; break; } @@ -1197,7 +1255,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, 0); #ifndef CONFIG_KVM_BOOKE_HV - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; vcpu->arch.shadow_pid = 1; vcpu->arch.shared->msr = 0; #endif @@ -1359,7 +1417,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu, return 0; } -void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { sregs->u.e.features |= KVM_SREGS_E_IVOR; @@ -1379,6 +1437,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + return 0; } int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -1413,8 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, get_sregs_base(vcpu, sregs); get_sregs_arch206(vcpu, sregs); - kvmppc_core_get_sregs(vcpu, sregs); - return 0; + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, @@ -1433,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (ret < 0) return ret; - return kvmppc_core_set_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) @@ -1441,7 +1499,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r = 0; union kvmppc_one_reg val; int size; - long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -1449,16 +1506,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_IAC1: + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); + break; case KVM_REG_PPC_IAC2: + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); + break; case KVM_REG_PPC_IAC4: - i = reg->id - KVM_REG_PPC_IAC1; - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); break; +#endif case KVM_REG_PPC_DAC1: + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); + break; case KVM_REG_PPC_DAC2: - i = reg->id - KVM_REG_PPC_DAC1; - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { u32 epr = get_guest_epr(vcpu); @@ -1477,10 +1542,13 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.tsr); break; case KVM_REG_PPC_DEBUG_INST: - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); + val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); + break; + case KVM_REG_PPC_VRSAVE: + val = get_reg_val(reg->id, vcpu->arch.vrsave); break; default: - r = kvmppc_get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); break; } @@ -1498,7 +1566,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r = 0; union kvmppc_one_reg val; int size; - long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -1509,16 +1576,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_IAC1: + vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); + break; case KVM_REG_PPC_IAC2: + vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: + vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); + break; case KVM_REG_PPC_IAC4: - i = reg->id - KVM_REG_PPC_IAC1; - vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); break; +#endif case KVM_REG_PPC_DAC1: + vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); + break; case KVM_REG_PPC_DAC2: - i = reg->id - KVM_REG_PPC_DAC1; - vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); break; case KVM_REG_PPC_EPR: { u32 new_epr = set_reg_val(reg->id, val); @@ -1552,20 +1627,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) kvmppc_set_tcr(vcpu, tcr); break; } + case KVM_REG_PPC_VRSAVE: + vcpu->arch.vrsave = set_reg_val(reg->id, val); + break; default: - r = kvmppc_set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); break; } return r; } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; @@ -1590,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; @@ -1671,6 +1743,157 @@ void kvmppc_decrementer_func(unsigned long data) kvmppc_set_tsr_bits(vcpu, TSR_DIS); } +static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg, + uint64_t addr, int index) +{ + switch (index) { + case 0: + dbg_reg->dbcr0 |= DBCR0_IAC1; + dbg_reg->iac1 = addr; + break; + case 1: + dbg_reg->dbcr0 |= DBCR0_IAC2; + dbg_reg->iac2 = addr; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 2: + dbg_reg->dbcr0 |= DBCR0_IAC3; + dbg_reg->iac3 = addr; + break; + case 3: + dbg_reg->dbcr0 |= DBCR0_IAC4; + dbg_reg->iac4 = addr; + break; +#endif + default: + return -EINVAL; + } + + dbg_reg->dbcr0 |= DBCR0_IDM; + return 0; +} + +static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr, + int type, int index) +{ + switch (index) { + case 0: + if (type & KVMPPC_DEBUG_WATCH_READ) + dbg_reg->dbcr0 |= DBCR0_DAC1R; + if (type & KVMPPC_DEBUG_WATCH_WRITE) + dbg_reg->dbcr0 |= DBCR0_DAC1W; + dbg_reg->dac1 = addr; + break; + case 1: + if (type & KVMPPC_DEBUG_WATCH_READ) + dbg_reg->dbcr0 |= DBCR0_DAC2R; + if (type & KVMPPC_DEBUG_WATCH_WRITE) + dbg_reg->dbcr0 |= DBCR0_DAC2W; + dbg_reg->dac2 = addr; + break; + default: + return -EINVAL; + } + + dbg_reg->dbcr0 |= DBCR0_IDM; + return 0; +} +void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) +{ + /* XXX: Add similar MSR protection for BookE-PR */ +#ifdef CONFIG_KVM_BOOKE_HV + BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP)); + if (set) { + if (prot_bitmap & MSR_UCLE) + vcpu->arch.shadow_msrp |= MSRP_UCLEP; + if (prot_bitmap & MSR_DE) + vcpu->arch.shadow_msrp |= MSRP_DEP; + if (prot_bitmap & MSR_PMM) + vcpu->arch.shadow_msrp |= MSRP_PMMP; + } else { + if (prot_bitmap & MSR_UCLE) + vcpu->arch.shadow_msrp &= ~MSRP_UCLEP; + if (prot_bitmap & MSR_DE) + vcpu->arch.shadow_msrp &= ~MSRP_DEP; + if (prot_bitmap & MSR_PMM) + vcpu->arch.shadow_msrp &= ~MSRP_PMMP; + } +#endif +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + struct debug_reg *dbg_reg; + int n, b = 0, w = 0; + + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; + vcpu->guest_debug = 0; + kvm_guest_protect_msr(vcpu, MSR_DE, false); + return 0; + } + + kvm_guest_protect_msr(vcpu, MSR_DE, true); + vcpu->guest_debug = dbg->control; + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; + /* Set DBCR0_EDM in guest visible DBCR0 register. */ + vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; + + /* Code below handles only HW breakpoints */ + dbg_reg = &(vcpu->arch.shadow_dbg_reg); + +#ifdef CONFIG_KVM_BOOKE_HV + /* + * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1 + * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0 + */ + dbg_reg->dbcr1 = 0; + dbg_reg->dbcr2 = 0; +#else + /* + * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1 + * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR + * is set. + */ + dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | + DBCR1_IAC4US; + dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#endif + + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + return 0; + + for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { + uint64_t addr = dbg->arch.bp[n].addr; + uint32_t type = dbg->arch.bp[n].type; + + if (type == KVMPPC_DEBUG_NONE) + continue; + + if (type & !(KVMPPC_DEBUG_WATCH_READ | + KVMPPC_DEBUG_WATCH_WRITE | + KVMPPC_DEBUG_BREAKPOINT)) + return -EINVAL; + + if (type & KVMPPC_DEBUG_BREAKPOINT) { + /* Setting H/W breakpoint */ + if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) + return -EINVAL; + } else { + /* Setting H/W watchpoint */ + if (kvmppc_booke_add_watchpoint(dbg_reg, addr, + type, w++)) + return -EINVAL; + } + } + + return 0; +} + void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = smp_processor_id(); @@ -1681,6 +1904,44 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { current->thread.kvm_vcpu = NULL; vcpu->cpu = -1; + + /* Clear pending debug event in DBSR */ + kvmppc_clear_dbsr(); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return kvm->arch.kvm_ops->init_vm(kvm); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + return kvm->arch.kvm_ops->vcpu_create(kvm, id); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvm->arch.kvm_ops->destroy_vm(kvm); +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 5fd1ba693579..09bfd9bc7cf8 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -99,6 +99,30 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); +extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, + ulong spr_val); +extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, + ulong *spr_val); +extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, + struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, + ulong spr_val); +extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, + ulong *spr_val); +extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); +extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, + struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, + ulong spr_val); +extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, + ulong *spr_val); + /* * Load up guest vcpu FP state if it's needed. * It also set the MSR_FP in thread so that host know @@ -129,4 +153,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) giveup_fpu(current); #endif } + +static inline void kvmppc_clear_dbsr(void) +{ + mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); +} #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ce6b73c29612..497b142f651c 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -305,7 +305,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); @@ -313,7 +313,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu) { #ifdef CONFIG_SPE if (vcpu->arch.shadow_msr & MSR_SPE) @@ -367,7 +367,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -388,9 +389,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) kvmppc_get_sregs_ivor(vcpu, sregs); kvmppc_get_sregs_e500_tlb(vcpu, sregs); + return 0; } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int ret; @@ -425,21 +428,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); return r; } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); return r; } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, + unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; struct kvm_vcpu *vcpu; @@ -481,7 +485,7 @@ out: return ERR_PTR(err); } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -492,15 +496,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_e500(struct kvm *kvm) { return 0; } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_e500(struct kvm *kvm) { } +static struct kvmppc_ops kvm_ops_e500 = { + .get_sregs = kvmppc_core_get_sregs_e500, + .set_sregs = kvmppc_core_set_sregs_e500, + .get_one_reg = kvmppc_get_one_reg_e500, + .set_one_reg = kvmppc_set_one_reg_e500, + .vcpu_load = kvmppc_core_vcpu_load_e500, + .vcpu_put = kvmppc_core_vcpu_put_e500, + .vcpu_create = kvmppc_core_vcpu_create_e500, + .vcpu_free = kvmppc_core_vcpu_free_e500, + .mmu_destroy = kvmppc_mmu_destroy_e500, + .init_vm = kvmppc_core_init_vm_e500, + .destroy_vm = kvmppc_core_destroy_vm_e500, + .emulate_op = kvmppc_core_emulate_op_e500, + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, +}; + static int __init kvmppc_e500_init(void) { int r, i; @@ -512,11 +533,11 @@ static int __init kvmppc_e500_init(void) r = kvmppc_core_check_processor_compat(); if (r) - return r; + goto err_out; r = kvmppc_booke_init(); if (r) - return r; + goto err_out; /* copy extra E500 exception handlers */ ivor[0] = mfspr(SPRN_IVOR32); @@ -534,11 +555,19 @@ static int __init kvmppc_e500_init(void) flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + ivor[max_ivor] + handler_len); - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + if (r) + goto err_out; + kvm_ops_e500.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500; + +err_out: + return r; } static void __exit kvmppc_e500_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c2e5e98453a6..4fd9650eb018 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) #define MAS2_ATTRIB_MASK \ - (MAS2_X0 | MAS2_X1) + (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G) #define MAS3_ATTRIB_MASK \ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index b10a01243abd..89b7f821f6c4 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,7 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -82,8 +83,28 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) } #endif -int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) +static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + + switch (get_oc(inst)) { + case EHPRIV_OC_DEBUG: + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = vcpu->arch.pc; + run->debug.arch.status = 0; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + emulated = EMULATE_EXIT_USER; + *advance = 0; + break; + default: + emulated = EMULATE_FAIL; + } + return emulated; +} + +int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) { int emulated = EMULATE_DONE; int ra = get_ra(inst); @@ -130,6 +151,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_EHPRIV: + emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, + advance); + break; + default: emulated = EMULATE_FAIL; } @@ -146,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) +int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; @@ -237,7 +263,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) +int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 6d6f153b6c1d..ebca6b88ea5e 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -32,7 +32,7 @@ #include <asm/kvm_ppc.h> #include "e500.h" -#include "trace.h" +#include "trace_booke.h" #include "timing.h" #include "e500_mmu_host.h" @@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, return get_tlb_raddr(gtlbe) | (eaddr & pgmask); } -void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu) { } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 1c6a9d729df4..ecf2247b13be 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -32,10 +32,11 @@ #include <asm/kvm_ppc.h> #include "e500.h" -#include "trace.h" #include "timing.h" #include "e500_mmu_host.h" +#include "trace_booke.h" + #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; @@ -253,6 +254,9 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags |= E500_TLB_VALID; + /* Mark the page accessed */ + kvm_set_pfn_accessed(pfn); + if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); } @@ -332,6 +336,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; + int ret = 0; + unsigned long mmu_seq; + struct kvm *kvm = vcpu_e500->vcpu.kvm; + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); /* * Translate guest physical to true physical, acquiring @@ -449,6 +460,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) { + ret = -EAGAIN; + goto out; + } + kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -457,10 +474,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); +out: + spin_unlock(&kvm->mmu_lock); + /* Drop refcount on page, so that mmu notifiers can clear it */ kvm_release_pfn_clean(pfn); - return 0; + return ret; } /* XXX only map the one-one case, for now use TLB0 */ diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 19c8379575f7..4132cd2fc171 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -147,7 +147,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvmppc_load_guest_fp(vcpu); } -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) { vcpu->arch.eplc = mfspr(SPRN_EPLC); vcpu->arch.epsc = mfspr(SPRN_EPSC); @@ -204,7 +204,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } -void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -224,10 +225,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; - kvmppc_get_sregs_ivor(vcpu, sregs); + return kvmppc_get_sregs_ivor(vcpu, sregs); } -int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int ret; @@ -260,21 +262,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } -int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); return r; } -int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) +static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); return r; } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, + unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; struct kvm_vcpu *vcpu; @@ -315,7 +318,7 @@ out: return ERR_PTR(err); } -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -325,7 +328,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } -int kvmppc_core_init_vm(struct kvm *kvm) +static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) { int lpid; @@ -337,27 +340,52 @@ int kvmppc_core_init_vm(struct kvm *kvm) return 0; } -void kvmppc_core_destroy_vm(struct kvm *kvm) +static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) { kvmppc_free_lpid(kvm->arch.lpid); } +static struct kvmppc_ops kvm_ops_e500mc = { + .get_sregs = kvmppc_core_get_sregs_e500mc, + .set_sregs = kvmppc_core_set_sregs_e500mc, + .get_one_reg = kvmppc_get_one_reg_e500mc, + .set_one_reg = kvmppc_set_one_reg_e500mc, + .vcpu_load = kvmppc_core_vcpu_load_e500mc, + .vcpu_put = kvmppc_core_vcpu_put_e500mc, + .vcpu_create = kvmppc_core_vcpu_create_e500mc, + .vcpu_free = kvmppc_core_vcpu_free_e500mc, + .mmu_destroy = kvmppc_mmu_destroy_e500, + .init_vm = kvmppc_core_init_vm_e500mc, + .destroy_vm = kvmppc_core_destroy_vm_e500mc, + .emulate_op = kvmppc_core_emulate_op_e500, + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, +}; + static int __init kvmppc_e500mc_init(void) { int r; r = kvmppc_booke_init(); if (r) - return r; + goto err_out; kvmppc_init_lpid(64); kvmppc_claim_lpid(0); /* host */ - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + if (r) + goto err_out; + kvm_ops_e500mc.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500mc; + +err_out: + return r; } static void __exit kvmppc_e500mc_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 751cd45f65a0..2f9a0873b44f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_PIR: break; default: - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, - spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, + spr_val); if (emulated == EMULATE_FAIL) printk(KERN_INFO "mtspr: unknown spr " "0x%x\n", sprn); @@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) spr_val = kvmppc_get_dec(vcpu, get_tb()); break; default: - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, - &spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, + &spr_val); if (unlikely(emulated == EMULATE_FAIL)) { printk(KERN_INFO "mfspr: unknown spr " "0x%x\n", sprn); @@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } if (emulated == EMULATE_FAIL) { - emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, + &advance); if (emulated == EMULATE_AGAIN) { advance = 0; } else if (emulated == EMULATE_FAIL) { @@ -483,3 +484,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) return emulated; } +EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 07c0106fab76..9ae97686e9f4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/module.h> #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> @@ -39,6 +40,12 @@ #define CREATE_TRACE_POINTS #include "trace.h" +struct kvmppc_ops *kvmppc_hv_ops; +EXPORT_SYMBOL_GPL(kvmppc_hv_ops); +struct kvmppc_ops *kvmppc_pr_ops; +EXPORT_SYMBOL_GPL(kvmppc_pr_ops); + + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !!(v->arch.pending_exceptions) || @@ -50,7 +57,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -#ifndef CONFIG_KVM_BOOK3S_64_HV /* * Common checks before entering the guest world. Call with interrupts * disabled. @@ -125,7 +131,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -#endif /* CONFIG_KVM_BOOK3S_64_HV */ +EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { @@ -179,6 +185,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) return r; } +EXPORT_SYMBOL_GPL(kvmppc_kvm_pv); int kvmppc_sanity_check(struct kvm_vcpu *vcpu) { @@ -192,11 +199,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) goto out; -#ifdef CONFIG_KVM_BOOK3S_64_HV /* HV KVM can only do PAPR mode for now */ - if (!vcpu->arch.papr_enabled) + if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm)) goto out; -#endif #ifdef CONFIG_KVM_BOOKE_HV if (!cpu_has_feature(CPU_FTR_EMB_HV)) @@ -209,6 +214,7 @@ out: vcpu->arch.sane = r; return r ? 0 : -EINVAL; } +EXPORT_SYMBOL_GPL(kvmppc_sanity_check); int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -243,6 +249,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } +EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); int kvm_arch_hardware_enable(void *garbage) { @@ -269,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - if (type) - return -EINVAL; - + struct kvmppc_ops *kvm_ops = NULL; + /* + * if we have both HV and PR enabled, default is HV + */ + if (type == 0) { + if (kvmppc_hv_ops) + kvm_ops = kvmppc_hv_ops; + else + kvm_ops = kvmppc_pr_ops; + if (!kvm_ops) + goto err_out; + } else if (type == KVM_VM_PPC_HV) { + if (!kvmppc_hv_ops) + goto err_out; + kvm_ops = kvmppc_hv_ops; + } else if (type == KVM_VM_PPC_PR) { + if (!kvmppc_pr_ops) + goto err_out; + kvm_ops = kvmppc_pr_ops; + } else + goto err_out; + + if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + return -ENOENT; + + kvm->arch.kvm_ops = kvm_ops; return kvmppc_core_init_vm(kvm); +err_out: + return -EINVAL; } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -292,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvmppc_core_destroy_vm(kvm); mutex_unlock(&kvm->lock); + + /* drop the module reference */ + module_put(kvm->arch.kvm_ops->owner); } void kvm_arch_sync_events(struct kvm *kvm) @@ -301,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm) int kvm_dev_ioctl_check_extension(long ext) { int r; + /* FIXME!! + * Should some of this be vm ioctl ? is it possible now ? + */ + int hv_enabled = kvmppc_hv_ops ? 1 : 0; switch (ext) { #ifdef CONFIG_BOOKE @@ -320,22 +359,26 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DEVICE_CTRL: r = 1; break; -#ifndef CONFIG_KVM_BOOK3S_64_HV case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif -#ifdef CONFIG_KVM_MPIC - case KVM_CAP_IRQ_MPIC: -#endif - r = 1; + /* We support this only for PR */ + r = !hv_enabled; break; +#ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; #endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: + r = 1; + break; +#endif + #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_PPC_ALLOC_HTAB: @@ -346,32 +389,37 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - r = threads_per_core; + if (hv_enabled) + r = threads_per_core; + else + r = 0; break; case KVM_CAP_PPC_RMA: - r = 1; + r = hv_enabled; /* PPC970 requires an RMA */ - if (cpu_has_feature(CPU_FTR_ARCH_201)) + if (r && cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; break; #endif case KVM_CAP_SYNC_MMU: -#ifdef CONFIG_KVM_BOOK3S_64_HV - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (hv_enabled) + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; + else + r = 0; #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) r = 1; #else r = 0; - break; #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV + break; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: - r = 1; + r = hv_enabled; break; #endif - break; case KVM_CAP_NR_VCPUS: /* * Recommending a number of CPUs is somewhat arbitrary; we @@ -379,11 +427,10 @@ int kvm_dev_ioctl_check_extension(long ext) * will have secondary threads "offline"), and for other KVM * implementations just count online CPUs. */ -#ifdef CONFIG_KVM_BOOK3S_64_HV - r = num_present_cpus(); -#else - r = num_online_cpus(); -#endif + if (hv_enabled) + r = num_present_cpus(); + else + r = num_online_cpus(); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; @@ -407,15 +454,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } void kvm_arch_memslots_updated(struct kvm *kvm) @@ -659,6 +707,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_handle_load); /* Same as above, but sign extends */ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -720,6 +769,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_handle_store); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -1024,52 +1074,12 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); goto out; } -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#ifdef CONFIG_KVM_BOOK3S_64_HV - case KVM_ALLOCATE_RMA: { - struct kvm_allocate_rma rma; - struct kvm *kvm = filp->private_data; - - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) - r = -EFAULT; - break; - } - - case KVM_PPC_ALLOCATE_HTAB: { - u32 htab_order; - - r = -EFAULT; - if (get_user(htab_order, (u32 __user *)argp)) - break; - r = kvmppc_alloc_reset_hpt(kvm, &htab_order); - if (r) - break; - r = -EFAULT; - if (put_user(htab_order, (u32 __user *)argp)) - break; - r = 0; - break; - } - - case KVM_PPC_GET_HTAB_FD: { - struct kvm_get_htab_fd ghf; - - r = -EFAULT; - if (copy_from_user(&ghf, argp, sizeof(ghf))) - break; - r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); - break; - } -#endif /* CONFIG_KVM_BOOK3S_64_HV */ - -#ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { struct kvm_ppc_smmu_info info; + struct kvm *kvm = filp->private_data; memset(&info, 0, sizeof(info)); - r = kvm_vm_ioctl_get_smmu_info(kvm, &info); + r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info); if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) r = -EFAULT; break; @@ -1080,11 +1090,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_rtas_define_token(kvm, argp); break; } -#endif /* CONFIG_PPC_BOOK3S_64 */ + default: { + struct kvm *kvm = filp->private_data; + r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); + } +#else /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; +#endif } - out: return r; } @@ -1106,22 +1120,26 @@ long kvmppc_alloc_lpid(void) return lpid; } +EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid); void kvmppc_claim_lpid(long lpid) { set_bit(lpid, lpid_inuse); } +EXPORT_SYMBOL_GPL(kvmppc_claim_lpid); void kvmppc_free_lpid(long lpid) { clear_bit(lpid, lpid_inuse); } +EXPORT_SYMBOL_GPL(kvmppc_free_lpid); void kvmppc_init_lpid(unsigned long nr_lpids_param) { nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); memset(lpid_inuse, 0, sizeof(lpid_inuse)); } +EXPORT_SYMBOL_GPL(kvmppc_init_lpid); int kvm_arch_init(void *opaque) { @@ -1130,4 +1148,5 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { + } diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index e326489a5420..2e0e67ef3544 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); -#ifdef CONFIG_PPC_BOOK3S -#define kvm_trace_symbol_exit \ - {0x100, "SYSTEM_RESET"}, \ - {0x200, "MACHINE_CHECK"}, \ - {0x300, "DATA_STORAGE"}, \ - {0x380, "DATA_SEGMENT"}, \ - {0x400, "INST_STORAGE"}, \ - {0x480, "INST_SEGMENT"}, \ - {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ - {0x502, "EXTERNAL_HV"}, \ - {0x600, "ALIGNMENT"}, \ - {0x700, "PROGRAM"}, \ - {0x800, "FP_UNAVAIL"}, \ - {0x900, "DECREMENTER"}, \ - {0x980, "HV_DECREMENTER"}, \ - {0xc00, "SYSCALL"}, \ - {0xd00, "TRACE"}, \ - {0xe00, "H_DATA_STORAGE"}, \ - {0xe20, "H_INST_STORAGE"}, \ - {0xe40, "H_EMUL_ASSIST"}, \ - {0xf00, "PERFMON"}, \ - {0xf20, "ALTIVEC"}, \ - {0xf40, "VSX"} -#else -#define kvm_trace_symbol_exit \ - {0, "CRITICAL"}, \ - {1, "MACHINE_CHECK"}, \ - {2, "DATA_STORAGE"}, \ - {3, "INST_STORAGE"}, \ - {4, "EXTERNAL"}, \ - {5, "ALIGNMENT"}, \ - {6, "PROGRAM"}, \ - {7, "FP_UNAVAIL"}, \ - {8, "SYSCALL"}, \ - {9, "AP_UNAVAIL"}, \ - {10, "DECREMENTER"}, \ - {11, "FIT"}, \ - {12, "WATCHDOG"}, \ - {13, "DTLB_MISS"}, \ - {14, "ITLB_MISS"}, \ - {15, "DEBUG"}, \ - {32, "SPE_UNAVAIL"}, \ - {33, "SPE_FP_DATA"}, \ - {34, "SPE_FP_ROUND"}, \ - {35, "PERFORMANCE_MONITOR"}, \ - {36, "DOORBELL"}, \ - {37, "DOORBELL_CRITICAL"}, \ - {38, "GUEST_DBELL"}, \ - {39, "GUEST_DBELL_CRIT"}, \ - {40, "HV_SYSCALL"}, \ - {41, "HV_PRIV"} -#endif - -TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), - TP_ARGS(exit_nr, vcpu), - - TP_STRUCT__entry( - __field( unsigned int, exit_nr ) - __field( unsigned long, pc ) - __field( unsigned long, msr ) - __field( unsigned long, dar ) -#ifdef CONFIG_KVM_BOOK3S_PR - __field( unsigned long, srr1 ) -#endif - __field( unsigned long, last_inst ) - ), - - TP_fast_assign( -#ifdef CONFIG_KVM_BOOK3S_PR - struct kvmppc_book3s_shadow_vcpu *svcpu; -#endif - __entry->exit_nr = exit_nr; - __entry->pc = kvmppc_get_pc(vcpu); - __entry->dar = kvmppc_get_fault_dar(vcpu); - __entry->msr = vcpu->arch.shared->msr; -#ifdef CONFIG_KVM_BOOK3S_PR - svcpu = svcpu_get(vcpu); - __entry->srr1 = svcpu->shadow_srr1; - svcpu_put(svcpu); -#endif - __entry->last_inst = vcpu->arch.last_inst; - ), - - TP_printk("exit=%s" - " | pc=0x%lx" - " | msr=0x%lx" - " | dar=0x%lx" -#ifdef CONFIG_KVM_BOOK3S_PR - " | srr1=0x%lx" -#endif - " | last_inst=0x%lx" - , - __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), - __entry->pc, - __entry->msr, - __entry->dar, -#ifdef CONFIG_KVM_BOOK3S_PR - __entry->srr1, -#endif - __entry->last_inst - ) -); - -TRACE_EVENT(kvm_unmap_hva, - TP_PROTO(unsigned long hva), - TP_ARGS(hva), - - TP_STRUCT__entry( - __field( unsigned long, hva ) - ), - - TP_fast_assign( - __entry->hva = hva; - ), - - TP_printk("unmap hva 0x%lx\n", __entry->hva) -); - TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -236,315 +116,6 @@ TRACE_EVENT(kvm_check_requests, __entry->cpu_nr, __entry->requests) ); - -/************************************************************************* - * Book3S trace points * - *************************************************************************/ - -#ifdef CONFIG_KVM_BOOK3S_PR - -TRACE_EVENT(kvm_book3s_reenter, - TP_PROTO(int r, struct kvm_vcpu *vcpu), - TP_ARGS(r, vcpu), - - TP_STRUCT__entry( - __field( unsigned int, r ) - __field( unsigned long, pc ) - ), - - TP_fast_assign( - __entry->r = r; - __entry->pc = kvmppc_get_pc(vcpu); - ), - - TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) -); - -#ifdef CONFIG_PPC_BOOK3S_64 - -TRACE_EVENT(kvm_book3s_64_mmu_map, - TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, - struct kvmppc_pte *orig_pte), - TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), - - TP_STRUCT__entry( - __field( unsigned char, flag_w ) - __field( unsigned char, flag_x ) - __field( unsigned long, eaddr ) - __field( unsigned long, hpteg ) - __field( unsigned long, va ) - __field( unsigned long long, vpage ) - __field( unsigned long, hpaddr ) - ), - - TP_fast_assign( - __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; - __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; - __entry->eaddr = orig_pte->eaddr; - __entry->hpteg = hpteg; - __entry->va = va; - __entry->vpage = orig_pte->vpage; - __entry->hpaddr = hpaddr; - ), - - TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", - __entry->flag_w, __entry->flag_x, __entry->eaddr, - __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) -); - -#endif /* CONFIG_PPC_BOOK3S_64 */ - -TRACE_EVENT(kvm_book3s_mmu_map, - TP_PROTO(struct hpte_cache *pte), - TP_ARGS(pte), - - TP_STRUCT__entry( - __field( u64, host_vpn ) - __field( u64, pfn ) - __field( ulong, eaddr ) - __field( u64, vpage ) - __field( ulong, raddr ) - __field( int, flags ) - ), - - TP_fast_assign( - __entry->host_vpn = pte->host_vpn; - __entry->pfn = pte->pfn; - __entry->eaddr = pte->pte.eaddr; - __entry->vpage = pte->pte.vpage; - __entry->raddr = pte->pte.raddr; - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | - (pte->pte.may_write ? 0x2 : 0) | - (pte->pte.may_execute ? 0x1 : 0); - ), - - TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", - __entry->host_vpn, __entry->pfn, __entry->eaddr, - __entry->vpage, __entry->raddr, __entry->flags) -); - -TRACE_EVENT(kvm_book3s_mmu_invalidate, - TP_PROTO(struct hpte_cache *pte), - TP_ARGS(pte), - - TP_STRUCT__entry( - __field( u64, host_vpn ) - __field( u64, pfn ) - __field( ulong, eaddr ) - __field( u64, vpage ) - __field( ulong, raddr ) - __field( int, flags ) - ), - - TP_fast_assign( - __entry->host_vpn = pte->host_vpn; - __entry->pfn = pte->pfn; - __entry->eaddr = pte->pte.eaddr; - __entry->vpage = pte->pte.vpage; - __entry->raddr = pte->pte.raddr; - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | - (pte->pte.may_write ? 0x2 : 0) | - (pte->pte.may_execute ? 0x1 : 0); - ), - - TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", - __entry->host_vpn, __entry->pfn, __entry->eaddr, - __entry->vpage, __entry->raddr, __entry->flags) -); - -TRACE_EVENT(kvm_book3s_mmu_flush, - TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, - unsigned long long p2), - TP_ARGS(type, vcpu, p1, p2), - - TP_STRUCT__entry( - __field( int, count ) - __field( unsigned long long, p1 ) - __field( unsigned long long, p2 ) - __field( const char *, type ) - ), - - TP_fast_assign( - __entry->count = to_book3s(vcpu)->hpte_cache_count; - __entry->p1 = p1; - __entry->p2 = p2; - __entry->type = type; - ), - - TP_printk("Flush %d %sPTEs: %llx - %llx", - __entry->count, __entry->type, __entry->p1, __entry->p2) -); - -TRACE_EVENT(kvm_book3s_slb_found, - TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), - TP_ARGS(gvsid, hvsid), - - TP_STRUCT__entry( - __field( unsigned long long, gvsid ) - __field( unsigned long long, hvsid ) - ), - - TP_fast_assign( - __entry->gvsid = gvsid; - __entry->hvsid = hvsid; - ), - - TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) -); - -TRACE_EVENT(kvm_book3s_slb_fail, - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), - TP_ARGS(sid_map_mask, gvsid), - - TP_STRUCT__entry( - __field( unsigned short, sid_map_mask ) - __field( unsigned long long, gvsid ) - ), - - TP_fast_assign( - __entry->sid_map_mask = sid_map_mask; - __entry->gvsid = gvsid; - ), - - TP_printk("%x/%x: %llx", __entry->sid_map_mask, - SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) -); - -TRACE_EVENT(kvm_book3s_slb_map, - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, - unsigned long long hvsid), - TP_ARGS(sid_map_mask, gvsid, hvsid), - - TP_STRUCT__entry( - __field( unsigned short, sid_map_mask ) - __field( unsigned long long, guest_vsid ) - __field( unsigned long long, host_vsid ) - ), - - TP_fast_assign( - __entry->sid_map_mask = sid_map_mask; - __entry->guest_vsid = gvsid; - __entry->host_vsid = hvsid; - ), - - TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, - __entry->guest_vsid, __entry->host_vsid) -); - -TRACE_EVENT(kvm_book3s_slbmte, - TP_PROTO(u64 slb_vsid, u64 slb_esid), - TP_ARGS(slb_vsid, slb_esid), - - TP_STRUCT__entry( - __field( u64, slb_vsid ) - __field( u64, slb_esid ) - ), - - TP_fast_assign( - __entry->slb_vsid = slb_vsid; - __entry->slb_esid = slb_esid; - ), - - TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) -); - -#endif /* CONFIG_PPC_BOOK3S */ - - -/************************************************************************* - * Book3E trace points * - *************************************************************************/ - -#ifdef CONFIG_BOOKE - -TRACE_EVENT(kvm_booke206_stlb_write, - TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), - TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), - - TP_STRUCT__entry( - __field( __u32, mas0 ) - __field( __u32, mas8 ) - __field( __u32, mas1 ) - __field( __u64, mas2 ) - __field( __u64, mas7_3 ) - ), - - TP_fast_assign( - __entry->mas0 = mas0; - __entry->mas8 = mas8; - __entry->mas1 = mas1; - __entry->mas2 = mas2; - __entry->mas7_3 = mas7_3; - ), - - TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", - __entry->mas0, __entry->mas8, __entry->mas1, - __entry->mas2, __entry->mas7_3) -); - -TRACE_EVENT(kvm_booke206_gtlb_write, - TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), - TP_ARGS(mas0, mas1, mas2, mas7_3), - - TP_STRUCT__entry( - __field( __u32, mas0 ) - __field( __u32, mas1 ) - __field( __u64, mas2 ) - __field( __u64, mas7_3 ) - ), - - TP_fast_assign( - __entry->mas0 = mas0; - __entry->mas1 = mas1; - __entry->mas2 = mas2; - __entry->mas7_3 = mas7_3; - ), - - TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", - __entry->mas0, __entry->mas1, - __entry->mas2, __entry->mas7_3) -); - -TRACE_EVENT(kvm_booke206_ref_release, - TP_PROTO(__u64 pfn, __u32 flags), - TP_ARGS(pfn, flags), - - TP_STRUCT__entry( - __field( __u64, pfn ) - __field( __u32, flags ) - ), - - TP_fast_assign( - __entry->pfn = pfn; - __entry->flags = flags; - ), - - TP_printk("pfn=%llx flags=%x", - __entry->pfn, __entry->flags) -); - -TRACE_EVENT(kvm_booke_queue_irqprio, - TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), - TP_ARGS(vcpu, priority), - - TP_STRUCT__entry( - __field( __u32, cpu_nr ) - __field( __u32, priority ) - __field( unsigned long, pending ) - ), - - TP_fast_assign( - __entry->cpu_nr = vcpu->vcpu_id; - __entry->priority = priority; - __entry->pending = vcpu->arch.pending_exceptions; - ), - - TP_printk("vcpu=%x prio=%x pending=%lx", - __entry->cpu_nr, __entry->priority, __entry->pending) -); - -#endif - #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h new file mode 100644 index 000000000000..f7537cf26ce7 --- /dev/null +++ b/arch/powerpc/kvm/trace_booke.h @@ -0,0 +1,177 @@ +#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_BOOKE_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_booke +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + +#define kvm_trace_symbol_exit \ + {0, "CRITICAL"}, \ + {1, "MACHINE_CHECK"}, \ + {2, "DATA_STORAGE"}, \ + {3, "INST_STORAGE"}, \ + {4, "EXTERNAL"}, \ + {5, "ALIGNMENT"}, \ + {6, "PROGRAM"}, \ + {7, "FP_UNAVAIL"}, \ + {8, "SYSCALL"}, \ + {9, "AP_UNAVAIL"}, \ + {10, "DECREMENTER"}, \ + {11, "FIT"}, \ + {12, "WATCHDOG"}, \ + {13, "DTLB_MISS"}, \ + {14, "ITLB_MISS"}, \ + {15, "DEBUG"}, \ + {32, "SPE_UNAVAIL"}, \ + {33, "SPE_FP_DATA"}, \ + {34, "SPE_FP_ROUND"}, \ + {35, "PERFORMANCE_MONITOR"}, \ + {36, "DOORBELL"}, \ + {37, "DOORBELL_CRITICAL"}, \ + {38, "GUEST_DBELL"}, \ + {39, "GUEST_DBELL_CRIT"}, \ + {40, "HV_SYSCALL"}, \ + {41, "HV_PRIV"} + +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) + __field( unsigned long, last_inst ) + ), + + TP_fast_assign( + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; + __entry->last_inst = vcpu->arch.last_inst; + ), + + TP_printk("exit=%s" + " | pc=0x%lx" + " | msr=0x%lx" + " | dar=0x%lx" + " | last_inst=0x%lx" + , + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), + __entry->pc, + __entry->msr, + __entry->dar, + __entry->last_inst + ) +); + +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + +TRACE_EVENT(kvm_booke206_stlb_write, + TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), + TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), + + TP_STRUCT__entry( + __field( __u32, mas0 ) + __field( __u32, mas8 ) + __field( __u32, mas1 ) + __field( __u64, mas2 ) + __field( __u64, mas7_3 ) + ), + + TP_fast_assign( + __entry->mas0 = mas0; + __entry->mas8 = mas8; + __entry->mas1 = mas1; + __entry->mas2 = mas2; + __entry->mas7_3 = mas7_3; + ), + + TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", + __entry->mas0, __entry->mas8, __entry->mas1, + __entry->mas2, __entry->mas7_3) +); + +TRACE_EVENT(kvm_booke206_gtlb_write, + TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), + TP_ARGS(mas0, mas1, mas2, mas7_3), + + TP_STRUCT__entry( + __field( __u32, mas0 ) + __field( __u32, mas1 ) + __field( __u64, mas2 ) + __field( __u64, mas7_3 ) + ), + + TP_fast_assign( + __entry->mas0 = mas0; + __entry->mas1 = mas1; + __entry->mas2 = mas2; + __entry->mas7_3 = mas7_3; + ), + + TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", + __entry->mas0, __entry->mas1, + __entry->mas2, __entry->mas7_3) +); + +TRACE_EVENT(kvm_booke206_ref_release, + TP_PROTO(__u64 pfn, __u32 flags), + TP_ARGS(pfn, flags), + + TP_STRUCT__entry( + __field( __u64, pfn ) + __field( __u32, flags ) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->flags = flags; + ), + + TP_printk("pfn=%llx flags=%x", + __entry->pfn, __entry->flags) +); + +TRACE_EVENT(kvm_booke_queue_irqprio, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), + TP_ARGS(vcpu, priority), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, priority ) + __field( unsigned long, pending ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->priority = priority; + __entry->pending = vcpu->arch.pending_exceptions; + ), + + TP_printk("vcpu=%x prio=%x pending=%lx", + __entry->cpu_nr, __entry->priority, __entry->pending) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h new file mode 100644 index 000000000000..8b22e4748344 --- /dev/null +++ b/arch/powerpc/kvm/trace_pr.h @@ -0,0 +1,297 @@ + +#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_PR_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_pr +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +TRACE_EVENT(kvm_book3s_reenter, + TP_PROTO(int r, struct kvm_vcpu *vcpu), + TP_ARGS(r, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, r ) + __field( unsigned long, pc ) + ), + + TP_fast_assign( + __entry->r = r; + __entry->pc = kvmppc_get_pc(vcpu); + ), + + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) +); + +#ifdef CONFIG_PPC_BOOK3S_64 + +TRACE_EVENT(kvm_book3s_64_mmu_map, + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, + struct kvmppc_pte *orig_pte), + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), + + TP_STRUCT__entry( + __field( unsigned char, flag_w ) + __field( unsigned char, flag_x ) + __field( unsigned long, eaddr ) + __field( unsigned long, hpteg ) + __field( unsigned long, va ) + __field( unsigned long long, vpage ) + __field( unsigned long, hpaddr ) + ), + + TP_fast_assign( + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; + __entry->eaddr = orig_pte->eaddr; + __entry->hpteg = hpteg; + __entry->va = va; + __entry->vpage = orig_pte->vpage; + __entry->hpaddr = hpaddr; + ), + + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", + __entry->flag_w, __entry->flag_x, __entry->eaddr, + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) +); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + +TRACE_EVENT(kvm_book3s_mmu_map, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_vpn ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_vpn = pte->host_vpn; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_vpn, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_invalidate, + TP_PROTO(struct hpte_cache *pte), + TP_ARGS(pte), + + TP_STRUCT__entry( + __field( u64, host_vpn ) + __field( u64, pfn ) + __field( ulong, eaddr ) + __field( u64, vpage ) + __field( ulong, raddr ) + __field( int, flags ) + ), + + TP_fast_assign( + __entry->host_vpn = pte->host_vpn; + __entry->pfn = pte->pfn; + __entry->eaddr = pte->pte.eaddr; + __entry->vpage = pte->pte.vpage; + __entry->raddr = pte->pte.raddr; + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | + (pte->pte.may_write ? 0x2 : 0) | + (pte->pte.may_execute ? 0x1 : 0); + ), + + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_vpn, __entry->pfn, __entry->eaddr, + __entry->vpage, __entry->raddr, __entry->flags) +); + +TRACE_EVENT(kvm_book3s_mmu_flush, + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, + unsigned long long p2), + TP_ARGS(type, vcpu, p1, p2), + + TP_STRUCT__entry( + __field( int, count ) + __field( unsigned long long, p1 ) + __field( unsigned long long, p2 ) + __field( const char *, type ) + ), + + TP_fast_assign( + __entry->count = to_book3s(vcpu)->hpte_cache_count; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->type = type; + ), + + TP_printk("Flush %d %sPTEs: %llx - %llx", + __entry->count, __entry->type, __entry->p1, __entry->p2) +); + +TRACE_EVENT(kvm_book3s_slb_found, + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), + TP_ARGS(gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned long long, gvsid ) + __field( unsigned long long, hvsid ) + ), + + TP_fast_assign( + __entry->gvsid = gvsid; + __entry->hvsid = hvsid; + ), + + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) +); + +TRACE_EVENT(kvm_book3s_slb_fail, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), + TP_ARGS(sid_map_mask, gvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, gvsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->gvsid = gvsid; + ), + + TP_printk("%x/%x: %llx", __entry->sid_map_mask, + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) +); + +TRACE_EVENT(kvm_book3s_slb_map, + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, + unsigned long long hvsid), + TP_ARGS(sid_map_mask, gvsid, hvsid), + + TP_STRUCT__entry( + __field( unsigned short, sid_map_mask ) + __field( unsigned long long, guest_vsid ) + __field( unsigned long long, host_vsid ) + ), + + TP_fast_assign( + __entry->sid_map_mask = sid_map_mask; + __entry->guest_vsid = gvsid; + __entry->host_vsid = hvsid; + ), + + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, + __entry->guest_vsid, __entry->host_vsid) +); + +TRACE_EVENT(kvm_book3s_slbmte, + TP_PROTO(u64 slb_vsid, u64 slb_esid), + TP_ARGS(slb_vsid, slb_esid), + + TP_STRUCT__entry( + __field( u64, slb_vsid ) + __field( u64, slb_esid ) + ), + + TP_fast_assign( + __entry->slb_vsid = slb_vsid; + __entry->slb_esid = slb_esid; + ), + + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) + __field( unsigned long, srr1 ) + __field( unsigned long, last_inst ) + ), + + TP_fast_assign( + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; + __entry->srr1 = vcpu->arch.shadow_srr1; + __entry->last_inst = vcpu->arch.last_inst; + ), + + TP_printk("exit=%s" + " | pc=0x%lx" + " | msr=0x%lx" + " | dar=0x%lx" + " | srr1=0x%lx" + " | last_inst=0x%lx" + , + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), + __entry->pc, + __entry->msr, + __entry->dar, + __entry->srr1, + __entry->last_inst + ) +); + +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 167f72555d60..57a072065057 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -226,19 +226,35 @@ _GLOBAL(csum_partial) blr - .macro source + .macro srcnr 100: .section __ex_table,"a" .align 3 - .llong 100b,.Lsrc_error + .llong 100b,.Lsrc_error_nr .previous .endm - .macro dest + .macro source +150: + .section __ex_table,"a" + .align 3 + .llong 150b,.Lsrc_error + .previous + .endm + + .macro dstnr 200: .section __ex_table,"a" .align 3 - .llong 200b,.Ldest_error + .llong 200b,.Ldest_error_nr + .previous + .endm + + .macro dest +250: + .section __ex_table,"a" + .align 3 + .llong 250b,.Ldest_error .previous .endm @@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic) rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ beq .Lcopy_aligned - li r7,4 - sub r6,r7,r6 + li r9,4 + sub r6,r9,r6 mtctr r6 1: -source; lhz r6,0(r3) /* align to doubleword */ +srcnr; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 bdnz 1b @@ -392,10 +408,10 @@ dest; std r16,56(r4) mtctr r6 3: -source; ld r6,0(r3) +srcnr; ld r6,0(r3) addi r3,r3,8 adde r0,r0,r6 -dest; std r6,0(r4) +dstnr; std r6,0(r4) addi r4,r4,8 bdnz 3b @@ -405,10 +421,10 @@ dest; std r6,0(r4) srdi. r6,r5,2 beq .Lcopy_tail_halfword -source; lwz r6,0(r3) +srcnr; lwz r6,0(r3) addi r3,r3,4 adde r0,r0,r6 -dest; stw r6,0(r4) +dstnr; stw r6,0(r4) addi r4,r4,4 subi r5,r5,4 @@ -416,10 +432,10 @@ dest; stw r6,0(r4) srdi. r6,r5,1 beq .Lcopy_tail_byte -source; lhz r6,0(r3) +srcnr; lhz r6,0(r3) addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 subi r5,r5,2 @@ -427,10 +443,10 @@ dest; sth r6,0(r4) andi. r6,r5,1 beq .Lcopy_finish -source; lbz r6,0(r3) +srcnr; lbz r6,0(r3) sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 -dest; stb r6,0(r4) +dstnr; stb r6,0(r4) .Lcopy_finish: addze r0,r0 /* add in final carry */ @@ -440,6 +456,11 @@ dest; stb r6,0(r4) blr .Lsrc_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Lsrc_error_nr: cmpdi 0,r7,0 beqlr li r6,-EFAULT @@ -447,6 +468,11 @@ dest; stb r6,0(r4) blr .Ldest_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldest_error_nr: cmpdi 0,r8,0 beqlr li r6,-EFAULT diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb860..b1faa1593c90 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) */ if ((ra == 1) && !(regs->msr & MSR_PR) \ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32 /* * Check if we will touch kernel sack overflow */ @@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = -EINVAL; break; } - +#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d0cd9e4c6837..8ed035d2edb5 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -300,5 +300,9 @@ void vmemmap_free(unsigned long start, unsigned long end) { } +void register_page_bootmem_memmap(unsigned long section_nr, + struct page *start_page, unsigned long size) +{ +} #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1cf9c5b67f24..3fa93dc7fe75 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -297,12 +297,21 @@ void __init paging_init(void) } #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ +static void __init register_page_bootmem_info(void) +{ + int i; + + for_each_online_node(i) + register_page_bootmem_info_node(NODE_DATA(i)); +} + void __init mem_init(void) { #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif + register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2ee4a707f0df..a3f7abd2f13f 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -199,6 +199,7 @@ #define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) #define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) +#define MMCR1_FAB_SHIFT 36 #define MMCR1_DC_QUAL_SHIFT 47 #define MMCR1_IC_QUAL_SHIFT 46 @@ -388,8 +389,8 @@ static int power8_compute_mmcr(u64 event[], int n_ev, * the threshold bits are used for the match value. */ if (event_is_fab_match(event[i])) { - mmcr1 |= (event[i] >> EVENT_THR_CTL_SHIFT) & - EVENT_THR_CTL_MASK; + mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & + EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT; } else { val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; mmcra |= val << MMCRA_THR_CTL_SHIFT; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1c1771a40250..24f58cb0a543 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) alloc_bootmem_cpumask_var(&of_spin_mask); - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (cpu_thread_in_core(i) == 0) - cpumask_set_cpu(i, of_spin_mask); - } - } else { - cpumask_copy(of_spin_mask, cpu_present_mask); + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - cpumask_clear_cpu(boot_cpuid, of_spin_mask); - /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index dcc6ac2d8026..7143793859fa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 @@ -102,7 +103,6 @@ config S390 select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 6c32190dc73e..346b1c85ffb4 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -15,7 +15,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e87ecaa2c569..d5bc3750616e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -38,13 +38,6 @@ struct sca_block { struct sca_entry cpu[64]; } __attribute__((packed)); -#define KVM_NR_PAGE_SIZES 2 -#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) -#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) -#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) -#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) -#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) - #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -220,7 +213,6 @@ struct kvm_s390_interrupt_info { /* for local_interrupt.action_flags */ #define ACTION_STORE_ON_STOP (1<<0) #define ACTION_STOP_ON_STOP (1<<1) -#define ACTION_RELOADVCPU_ON_STOP (1<<2) struct kvm_s390_local_interrupt { spinlock_t lock; diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include <asm-generic/mutex-dec.h> - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0eb37505cab1..ca7821f07260 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -198,6 +198,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 701fe8c59e1f..83e5d216105e 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); extern int arch_spin_trylock_retry(arch_spinlock_t *); extern void arch_spin_relax(arch_spinlock_t *lock); +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner_cpu == 0; +} + static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c84f33d51f7b..7dd21720e5b0 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -40,28 +40,26 @@ static inline void *load_real_addr(void *addr) } /* - * Copy up to one page to vmalloc or real memory + * Copy real to virtual or real memory */ -static ssize_t copy_page_real(void *buf, void *src, size_t csize) +static int copy_from_realmem(void *dest, void *src, size_t count) { - size_t size; + unsigned long size; + int rc; - if (is_vmalloc_addr(buf)) { - BUG_ON(csize >= PAGE_SIZE); - /* If buf is not page aligned, copy first part */ - size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize); - if (size) { - if (memcpy_real(load_real_addr(buf), src, size)) - return -EFAULT; - buf += size; - src += size; - } - /* Copy second part */ - size = csize - size; - return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0; - } else { - return memcpy_real(buf, src, csize); - } + if (!count) + return 0; + if (!is_vmalloc_or_module_addr(dest)) + return memcpy_real(dest, src, count); + do { + size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); + if (memcpy_real(load_real_addr(dest), src, size)) + return -EFAULT; + count -= size; + dest += size; + src += size; + } while (count); + return 0; } /* @@ -114,7 +112,7 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, rc = copy_to_user_real((void __force __user *) buf, (void *) src, csize); else - rc = copy_page_real(buf, (void *) src, csize); + rc = copy_from_realmem(buf, (void *) src, csize); return (rc == 0) ? rc : csize; } @@ -210,7 +208,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count) if (OLDMEM_BASE) { if ((unsigned long) src < OLDMEM_SIZE) { copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); if (rc) return rc; } @@ -223,7 +221,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count) return rc; } } - return memcpy_real(dest + copied, src + copied, count - copied); + return copy_from_realmem(dest + copied, src + copied, count - copied); } /* diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cc30d1fb000c..0dc2b6d0a1ec 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -266,6 +266,7 @@ sysc_sigpending: tm __TI_flags+3(%r12),_TIF_SYSCALL jno sysc_return lm %r2,%r7,__PT_R2(%r11) # load svc arguments + l %r10,__TI_sysc_table(%r12) # 31 bit system call table xr %r8,%r8 # svc 0 returns -ENOSYS clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) jnl sysc_nr_ok # invalid svc number -> do svc 0 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 2b2188b97c6a..e5b43c97a834 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -297,6 +297,7 @@ sysc_sigpending: tm __TI_flags+7(%r12),_TIF_SYSCALL jno sysc_return lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lg %r10,__TI_sysc_table(%r12) # address of system call table lghi %r8,0 # svc 0 returns -ENOSYS llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number cghi %r1,NR_syscalls diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 0ce9fb245034..d86e64eddb42 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -67,6 +67,11 @@ static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) case 0xac: /* stnsm */ case 0xad: /* stosm */ return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } } switch (insn[0]) { case 0x0101: /* pr */ @@ -180,7 +185,6 @@ static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) break; case 0xc6: switch (insn[0] & 0x0f) { - case 0x00: /* exrl */ case 0x02: /* pfdrl */ case 0x04: /* cghrl */ case 0x05: /* chrl */ diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 3a74d8af0d69..78d967f180f4 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -107,14 +107,13 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { - int ret, idx; + int ret; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) return -EOPNOTSUPP; - idx = srcu_read_lock(&vcpu->kvm->srcu); /* * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) @@ -125,7 +124,6 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) vcpu->run->s.regs.gprs[2], 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); - srcu_read_unlock(&vcpu->kvm->srcu, idx); /* * Return cookie in gpr 2, but don't overwrite the register if the diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 99d789e8a018..374a439ccc60 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,20 +18,27 @@ #include <asm/uaccess.h> #include "kvm-s390.h" +/* Convert real to absolute address by applying the prefix of the CPU */ +static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, + unsigned long gaddr) +{ + unsigned long prefix = vcpu->arch.sie_block->prefix; + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE) + gaddr -= prefix; + return gaddr; +} + static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, void __user *gptr, int prefixing) { - unsigned long prefix = vcpu->arch.sie_block->prefix; unsigned long gaddr = (unsigned long) gptr; unsigned long uaddr; - if (prefixing) { - if (gaddr < 2 * PAGE_SIZE) - gaddr += prefix; - else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) - gaddr -= prefix; - } + if (prefixing) + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); uaddr = gmap_fault(gaddr, vcpu->arch.gmap); if (IS_ERR_VALUE(uaddr)) uaddr = -EFAULT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5ee56e5acc23..5ddbbde6f65c 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -62,12 +62,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); - if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; - rc = SIE_INTERCEPT_RERUNVCPU; - vcpu->run->exit_reason = KVM_EXIT_INTR; - } - if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7f35cb33e510..e7323cd9f109 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -436,6 +436,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); add_wait_queue(&vcpu->wq, &wait); @@ -455,6 +456,8 @@ no_timer: remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 776dafe918db..bedda67cc222 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -689,9 +689,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -static int __vcpu_run(struct kvm_vcpu *vcpu) +static int vcpu_pre_run(struct kvm_vcpu *vcpu) { - int rc; + int rc, cpuflags; memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); @@ -709,28 +709,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return rc; vcpu->arch.sie_block->icptcode = 0; - VCPU_EVENT(vcpu, 6, "entering sie flags %x", - atomic_read(&vcpu->arch.sie_block->cpuflags)); - trace_kvm_s390_sie_enter(vcpu, - atomic_read(&vcpu->arch.sie_block->cpuflags)); + cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); + trace_kvm_s390_sie_enter(vcpu, cpuflags); - /* - * As PF_VCPU will be used in fault handler, between guest_enter - * and guest_exit should be no uaccess. - */ - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); - rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - kvm_guest_exit(); + return 0; +} + +static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +{ + int rc; VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); - if (rc > 0) + if (exit_reason >= 0) { rc = 0; - if (rc < 0) { + } else { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; } else { @@ -741,6 +737,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + + if (rc == 0) { + if (kvm_is_ucontrol(vcpu->kvm)) + rc = -EOPNOTSUPP; + else + rc = kvm_handle_sie_intercept(vcpu); + } + + return rc; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu) +{ + int rc, exit_reason; + + /* + * We try to hold kvm->srcu during most of vcpu_run (except when run- + * ning the guest), so that memslots (and other stuff) are protected + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + do { + rc = vcpu_pre_run(vcpu); + if (rc) + break; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + /* + * As PF_VCPU will be used in fault handler, between + * guest_enter and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); + exit_reason = sie64a(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs); + kvm_guest_exit(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + rc = vcpu_post_run(vcpu, exit_reason); + } while (!signal_pending(current) && !rc); + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); return rc; } @@ -749,7 +788,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int rc; sigset_t sigsaved; -rerun_vcpu: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -782,19 +820,7 @@ rerun_vcpu: } might_fault(); - - do { - rc = __vcpu_run(vcpu); - if (rc) - break; - if (kvm_is_ucontrol(vcpu->kvm)) - rc = -EOPNOTSUPP; - else - rc = kvm_handle_sie_intercept(vcpu); - } while (!signal_pending(current) && !rc); - - if (rc == SIE_INTERCEPT_RERUNVCPU) - goto rerun_vcpu; + rc = __vcpu_run(vcpu); if (signal_pending(current) && !rc) { kvm_run->exit_reason = KVM_EXIT_INTR; @@ -951,6 +977,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + int idx; long r; switch (ioctl) { @@ -964,7 +991,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_S390_STORE_STATUS: + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_s390_vcpu_store_status(vcpu, arg); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { psw_t psw; @@ -1060,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index dc99f1ca4267..b44912a32949 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -28,8 +28,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); extern unsigned long *vfacilities; /* negativ values are error codes, positive values for internal conditions */ -#define SIE_INTERCEPT_RERUNVCPU (1<<0) -#define SIE_INTERCEPT_UCONTROL (1<<1) +#define SIE_INTERCEPT_UCONTROL (1<<0) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ @@ -91,8 +90,10 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) { - *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; - *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + if (r1) + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + if (r2) + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; } static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 59200ee275e5..2440602e6df1 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -30,6 +30,38 @@ #include "kvm-s390.h" #include "trace.h" +/* Handle SCK (SET CLOCK) interception */ +static int handle_set_clock(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *cpup; + s64 hostclk, val; + u64 op2; + int i; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + op2 = kvm_s390_get_base_disp_s(vcpu); + if (op2 & 7) /* Operand must be on a doubleword boundary */ + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (get_guest(vcpu, val, (u64 __user *) op2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (store_tod_clock(&hostclk)) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } + val = (val - hostclk) & ~0x3fUL; + + mutex_lock(&vcpu->kvm->lock); + kvm_for_each_vcpu(i, cpup, vcpu->kvm) + cpup->arch.sie_block->epoch = val; + mutex_unlock(&vcpu->kvm->lock); + + kvm_s390_set_psw_cc(vcpu, 0); + return 0; +} + static int handle_set_prefix(struct kvm_vcpu *vcpu) { u64 operand2; @@ -128,6 +160,33 @@ static int handle_skey(struct kvm_vcpu *vcpu) return 0; } +static int handle_test_block(struct kvm_vcpu *vcpu) +{ + unsigned long hva; + gpa_t addr; + int reg2; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + kvm_s390_get_regs_rre(vcpu, NULL, ®2); + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_real_to_abs(vcpu, addr); + + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(hva)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + /* + * We don't expect errors on modern systems, and do not care + * about storage keys (yet), so let's just clear the page. + */ + if (clear_user((void __user *)hva, PAGE_SIZE) != 0) + return -EFAULT; + kvm_s390_set_psw_cc(vcpu, 0); + vcpu->run->s.regs.gprs[0] = 0; + return 0; +} + static int handle_tpi(struct kvm_vcpu *vcpu) { struct kvm_s390_interrupt_info *inti; @@ -438,12 +497,14 @@ out_exception: static const intercept_handler_t b2_handlers[256] = { [0x02] = handle_stidp, + [0x04] = handle_set_clock, [0x10] = handle_set_prefix, [0x11] = handle_store_prefix, [0x12] = handle_store_cpu_address, [0x29] = handle_skey, [0x2a] = handle_skey, [0x2b] = handle_skey, + [0x2c] = handle_test_block, [0x30] = handle_io_inst, [0x31] = handle_io_inst, [0x32] = handle_io_inst, diff --git a/arch/score/Kconfig b/arch/score/Kconfig index a1be70db75fe..305f7ee1f382 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -2,6 +2,7 @@ menu "Machine selection" config SCORE def_bool y + select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_ATOMIC64 @@ -110,3 +111,6 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +config NO_IOMEM + def_bool y diff --git a/arch/score/Makefile b/arch/score/Makefile index 974aefe86123..9e3e060290e0 100644 --- a/arch/score/Makefile +++ b/arch/score/Makefile @@ -20,8 +20,8 @@ cflags-y += -G0 -pipe -mel -mnhwloop -D__SCOREEL__ \ # KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_AFLAGS_MODULE += -mlong-calls -KBUILD_CFLAGS_MODULE += -mlong-calls +KBUILD_AFLAGS_MODULE += +KBUILD_CFLAGS_MODULE += LDFLAGS += --oformat elf32-littlescore LDFLAGS_vmlinux += -G0 -static -nostdlib diff --git a/arch/score/include/asm/checksum.h b/arch/score/include/asm/checksum.h index f909ac3144a4..961bd64015a8 100644 --- a/arch/score/include/asm/checksum.h +++ b/arch/score/include/asm/checksum.h @@ -184,48 +184,57 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, __wsum sum) { __asm__ __volatile__( - ".set\tnoreorder\t\t\t# csum_ipv6_magic\n\t" - ".set\tnoat\n\t" - "addu\t%0, %5\t\t\t# proto (long in network byte order)\n\t" - "sltu\t$1, %0, %5\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %6\t\t\t# csum\n\t" - "sltu\t$1, %0, %6\n\t" - "lw\t%1, 0(%2)\t\t\t# four words source address\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 4(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 8(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 12(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 0(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 4(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 8(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 12(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "addu\t%0, $1\t\t\t# Add final carry\n\t" - ".set\tnoat\n\t" - ".set\tnoreorder" + ".set\tvolatile\t\t\t# csum_ipv6_magic\n\t" + "add\t%0, %0, %5\t\t\t# proto (long in network byte order)\n\t" + "cmp.c\t%5, %0\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %6\t\t\t# csum\n\t" + "cmp.c\t%6, %0\n\t" + "lw\t%1, [%2, 0]\t\t\t# four words source address\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "1:lw\t%1, [%2, 4]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%2,8]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%2, 12]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0,%1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 0]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 4]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 8]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 12]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:\n\t" + ".set\toptimize" : "=r" (sum), "=r" (proto) : "r" (saddr), "r" (daddr), "0" (htonl(len)), "1" (htonl(proto)), "r" (sum)); diff --git a/arch/score/include/asm/io.h b/arch/score/include/asm/io.h index fbbfd7132e3b..574c8827abe2 100644 --- a/arch/score/include/asm/io.h +++ b/arch/score/include/asm/io.h @@ -5,5 +5,4 @@ #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt - #endif /* _ASM_SCORE_IO_H */ diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 059a61b7071b..716b3fd1d863 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -2,7 +2,7 @@ #define _ASM_SCORE_PGALLOC_H #include <linux/mm.h> - +#include <linux/highmem.h> static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { diff --git a/arch/score/kernel/entry.S b/arch/score/kernel/entry.S index 7234ed09b7b7..befb87d30a89 100644 --- a/arch/score/kernel/entry.S +++ b/arch/score/kernel/entry.S @@ -264,7 +264,7 @@ resume_kernel: disable_irq lw r8, [r28, TI_PRE_COUNT] cmpz.c r8 - bne r8, restore_all + bne restore_all need_resched: lw r8, [r28, TI_FLAGS] andri.c r9, r8, _TIF_NEED_RESCHED @@ -415,7 +415,7 @@ ENTRY(handle_sys) sw r9, [r0, PT_EPC] cmpi.c r27, __NR_syscalls # check syscall number - bgeu illegal_syscall + bcs illegal_syscall slli r8, r27, 2 # get syscall routine la r11, sys_call_table diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index f4c6d02421d3..a1519ad3d49d 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -78,8 +78,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg0 = (unsigned long) childregs; if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); - p->thread->reg12 = usp; - p->thread->reg13 = arg; + p->thread.reg12 = usp; + p->thread.reg13 = arg; p->thread.reg3 = (unsigned long) ret_from_kernel_thread; } else { *childregs = *current_pt_regs(); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2137ad667438..78c4fdb91bc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -506,12 +506,17 @@ config SUN_OPENPROMFS Only choose N if you know in advance that you will not need to modify OpenPROM settings on the running system. -# Makefile helper +# Makefile helpers config SPARC64_PCI bool default y depends on SPARC64 && PCI +config SPARC64_PCI_MSI + bool + default y + depends on SPARC64_PCI && PCI_MSI + endmenu menu "Executable file formats" diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index e204f902e6c9..7c90c50c200d 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -254,7 +254,7 @@ static int sun_fd_request_irq(void) once = 1; error = request_irq(FLOPPY_IRQ, sparc_floppy_irq, - IRQF_DISABLED, "floppy", NULL); + 0, "floppy", NULL); return ((error == 0) ? 0 : -1); } diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 5080d16a832f..ec2e2e2aba7d 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -9,7 +9,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index d432fb20358e..d15cc1794b0e 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -1,3 +1,4 @@ + # # Makefile for the linux kernel. # @@ -99,7 +100,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o -obj-$(CONFIG_PCI_MSI) += pci_msi.o +obj-$(CONFIG_SPARC64_PCI_MSI) += pci_msi.o obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 62d6b153ffa2..dff60abbea01 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -849,9 +849,8 @@ void ldom_reboot(const char *boot_command) if (boot_command && strlen(boot_command)) { unsigned long len; - strcpy(full_boot_str, "boot "); - strlcpy(full_boot_str + strlen("boot "), boot_command, - sizeof(full_boot_str + strlen("boot "))); + snprintf(full_boot_str, sizeof(full_boot_str), "boot %s", + boot_command); len = strlen(full_boot_str); if (reboot_data_supported) { diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 54df554b82d9..e01d75d40329 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1249,12 +1249,12 @@ int ldc_bind(struct ldc_channel *lp, const char *name) snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - err = request_irq(lp->cfg.rx_irq, ldc_rx, IRQF_DISABLED, + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, lp->rx_irq_name, lp); if (err) return err; - err = request_irq(lp->cfg.tx_irq, ldc_tx, IRQF_DISABLED, + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, lp->tx_irq_name, lp); if (err) { free_irq(lp->cfg.rx_irq, lp); diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index d385eaadece7..709798460763 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -166,7 +166,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n) * * Atomically sets @v to @i and returns old @v */ -static inline u64 atomic64_xchg(atomic64_t *v, u64 n) +static inline long long atomic64_xchg(atomic64_t *v, long long n) { return xchg64(&v->counter, n); } @@ -180,7 +180,8 @@ static inline u64 atomic64_xchg(atomic64_t *v, u64 n) * Atomically checks if @v holds @o and replaces it with @n if so. * Returns the old value at @v. */ -static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) { return cmpxchg64(&v->counter, o, n); } diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 0d0395b1b152..1ad4a1f7d42b 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -80,7 +80,7 @@ static inline void atomic_set(atomic_t *v, int n) /* A 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + long long counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } @@ -91,14 +91,14 @@ typedef struct { * * Atomically reads the value of @v. */ -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { /* * Requires an atomic op to read both 32-bit parts consistently. * Casting away const is safe since the atomic support routines * do not write to memory if the value has not been modified. */ - return _atomic64_xchg_add((u64 *)&v->counter, 0); + return _atomic64_xchg_add((long long *)&v->counter, 0); } /** @@ -108,7 +108,7 @@ static inline u64 atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v. */ -static inline void atomic64_add(u64 i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { _atomic64_xchg_add(&v->counter, i); } @@ -120,7 +120,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { smp_mb(); /* barrier for proper semantics */ return _atomic64_xchg_add(&v->counter, i) + i; @@ -135,7 +135,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns non-zero if @v was not @u, and zero otherwise. */ -static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +static inline long long atomic64_add_unless(atomic64_t *v, long long a, + long long u) { smp_mb(); /* barrier for proper semantics */ return _atomic64_xchg_add_unless(&v->counter, a, u) != u; @@ -151,7 +152,7 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) * atomic64_set() can't be just a raw store, since it would be lost if it * fell between the load and store of one of the other atomic ops. */ -static inline void atomic64_set(atomic64_t *v, u64 n) +static inline void atomic64_set(atomic64_t *v, long long n) { _atomic64_xchg(&v->counter, n); } @@ -236,11 +237,13 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p, extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); -extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n); -extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add_unless(volatile u64 *p, - int *lock, u64 o, u64 n); +extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, + long long o, long long n); +extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); +extern long long __atomic64_xchg_add(volatile long long *p, int *lock, + long long n); +extern long long __atomic64_xchg_add_unless(volatile long long *p, + int *lock, long long o, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h index 4001d5eab4bb..0ccda3c425be 100644 --- a/arch/tile/include/asm/cmpxchg.h +++ b/arch/tile/include/asm/cmpxchg.h @@ -35,10 +35,10 @@ int _atomic_xchg(int *ptr, int n); int _atomic_xchg_add(int *v, int i); int _atomic_xchg_add_unless(int *v, int a, int u); int _atomic_cmpxchg(int *ptr, int o, int n); -u64 _atomic64_xchg(u64 *v, u64 n); -u64 _atomic64_xchg_add(u64 *v, u64 i); -u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u); -u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); +long long _atomic64_xchg(long long *v, long long n); +long long _atomic64_xchg_add(long long *v, long long i); +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u); +long long _atomic64_cmpxchg(long long *v, long long o, long long n); #define xchg(ptr, n) \ ({ \ @@ -53,7 +53,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 4) \ __cmpxchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, (int)n); \ + (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \ + (int)n); \ }) #define xchg64(ptr, n) \ @@ -61,7 +62,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 8) \ __xchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic64_xchg((u64 *)(ptr), (u64)(n)); \ + (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \ + (long long)(n)); \ }) #define cmpxchg64(ptr, o, n) \ @@ -69,7 +71,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 8) \ __cmpxchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic64_cmpxchg((u64 *)ptr, (u64)o, (u64)n); \ + (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \ + (long long)o, (long long)n); \ }) #else @@ -81,10 +84,11 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); switch (sizeof(*(ptr))) { \ case 4: \ __x = (typeof(__x))(unsigned long) \ - __insn_exch4((ptr), (u32)(unsigned long)(n)); \ + __insn_exch4((ptr), \ + (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x)) \ + __x = (typeof(__x)) \ __insn_exch((ptr), (unsigned long)(n)); \ break; \ default: \ @@ -103,10 +107,12 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); switch (sizeof(*(ptr))) { \ case 4: \ __x = (typeof(__x))(unsigned long) \ - __insn_cmpexch4((ptr), (u32)(unsigned long)(n)); \ + __insn_cmpexch4((ptr), \ + (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x))__insn_cmpexch((ptr), (u64)(n)); \ + __x = (typeof(__x))__insn_cmpexch((ptr), \ + (long long)(n)); \ break; \ default: \ __cmpxchg_called_with_bad_pointer(); \ diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h index 63294f5a8efb..4f7ae39fa202 100644 --- a/arch/tile/include/asm/percpu.h +++ b/arch/tile/include/asm/percpu.h @@ -15,9 +15,37 @@ #ifndef _ASM_TILE_PERCPU_H #define _ASM_TILE_PERCPU_H -register unsigned long __my_cpu_offset __asm__("tp"); -#define __my_cpu_offset __my_cpu_offset -#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) +register unsigned long my_cpu_offset_reg asm("tp"); + +#ifdef CONFIG_PREEMPT +/* + * For full preemption, we can't just use the register variable + * directly, since we need barrier() to hazard against it, causing the + * compiler to reload anything computed from a previous "tp" value. + * But we also don't want to use volatile asm, since we'd like the + * compiler to be able to cache the value across multiple percpu reads. + * So we use a fake stack read as a hazard against barrier(). + * The 'U' constraint is like 'm' but disallows postincrement. + */ +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long tp; + register unsigned long *sp asm("sp"); + asm("move %0, tp" : "=r" (tp) : "U" (*sp)); + return tp; +} +#define __my_cpu_offset __my_cpu_offset() +#else +/* + * We don't need to hazard against barrier() since "tp" doesn't ever + * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only + * changes at function call points, at which we are already re-reading + * the value of "tp" due to "my_cpu_offset_reg" being a global variable. + */ +#define __my_cpu_offset my_cpu_offset_reg +#endif + +#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) #include <asm-generic/percpu.h> diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index df27a1fd94a3..531f4c365351 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -66,7 +66,7 @@ static struct hardwall_type hardwall_types[] = { 0, "udn", LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_UDN].lock), NULL }, #ifndef __tilepro__ @@ -77,7 +77,7 @@ static struct hardwall_type hardwall_types[] = { 1, /* disabled pending hypervisor support */ "idn", LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IDN].lock), NULL }, { /* access to user-space IPI */ @@ -87,7 +87,7 @@ static struct hardwall_type hardwall_types[] = { 0, "ipi", LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IPI].lock), NULL }, #endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 088d5c141e68..2cbe6d5dd6b0 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -815,6 +815,9 @@ STD_ENTRY(interrupt_return) } bzt r28, 1f bnz r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF jal preempt_schedule_irq FEEDBACK_REENTER(interrupt_return) 1: diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index ec755d3f3734..b8fc497f2437 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -841,6 +841,9 @@ STD_ENTRY(interrupt_return) } beqzt r28, 1f bnez r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF jal preempt_schedule_irq FEEDBACK_REENTER(interrupt_return) 1: diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 362284af3afd..c93977a62116 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -23,6 +23,7 @@ #include <linux/mmzone.h> #include <linux/dcache.h> #include <linux/fs.h> +#include <linux/string.h> #include <asm/backtrace.h> #include <asm/page.h> #include <asm/ucontext.h> @@ -332,21 +333,18 @@ static void describe_addr(struct KBacktraceIterator *kbt, } if (vma->vm_file) { - char *s; p = d_path(&vma->vm_file->f_path, buf, bufsize); if (IS_ERR(p)) p = "?"; - s = strrchr(p, '/'); - if (s) - p = s+1; + name = kbasename(p); } else { - p = "anon"; + name = "anon"; } /* Generate a string description of the vma info. */ - namelen = strlen(p); + namelen = strlen(name); remaining = (bufsize - 1) - namelen; - memmove(buf, p, namelen); + memmove(buf, name, namelen); snprintf(buf + namelen, remaining, "[%lx+%lx] ", vma->vm_start, vma->vm_end - vma->vm_start); } diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 759efa337be8..c89b211fd9e7 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -107,19 +107,19 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) EXPORT_SYMBOL(_atomic_xor); -u64 _atomic64_xchg(u64 *v, u64 n) +long long _atomic64_xchg(long long *v, long long n) { return __atomic64_xchg(v, __atomic_setup(v), n); } EXPORT_SYMBOL(_atomic64_xchg); -u64 _atomic64_xchg_add(u64 *v, u64 i) +long long _atomic64_xchg_add(long long *v, long long i) { return __atomic64_xchg_add(v, __atomic_setup(v), i); } EXPORT_SYMBOL(_atomic64_xchg_add); -u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u) +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u) { /* * Note: argument order is switched here since it is easier @@ -130,7 +130,7 @@ u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u) } EXPORT_SYMBOL(_atomic64_xchg_add_unless); -u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n) +long long _atomic64_cmpxchg(long long *v, long long o, long long n) { return __atomic64_cmpxchg(v, __atomic_setup(v), o, n); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d37745..145d703227bf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -860,7 +860,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD + depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -885,11 +885,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI config X86_IO_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI config X86_VISWS_APIC def_bool y diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d8..89270b4318db 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) * Catch too early usage of this before alternatives * have run. */ - asm goto("1: jmp %l[t_warn]\n" + asm_volatile_goto("1: jmp %l[t_warn]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif - asm goto("1: jmp %l[t_no]\n" + asm_volatile_goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) * have. Thus, we force the jump to the widest, 4-byte, signed relative * offset even though the last would often fit in less bytes. */ - asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800c..6a2cefb4395a 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:" + asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 15f960c06ff7..24ec1216596e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -274,13 +274,17 @@ struct x86_emulate_ctxt { bool guest_mode; /* guest running a nested guest */ bool perm_ok; /* do not check permissions if true */ - bool only_vendor_specific_insn; + bool ud; /* inject an #UD if host doesn't support insn */ bool have_exception; struct x86_exception exception; - /* decode cache */ - u8 twobyte; + /* + * decode cache + */ + + /* current opcode length in bytes */ + u8 opcode_len; u8 b; u8 intercept; u8 lock_prefix; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c76ff74a98f2..ae5d7830855c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,6 +79,13 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 @@ -253,7 +260,6 @@ struct kvm_pio_request { * mode. */ struct kvm_mmu { - void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); @@ -261,7 +267,6 @@ struct kvm_mmu { bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); @@ -389,6 +394,8 @@ struct kvm_vcpu_arch { struct fpu guest_fpu; u64 xcr0; + u64 guest_supported_xcr0; + u32 guest_xstate_size; struct kvm_pio_request pio; void *pio_data; @@ -557,7 +564,9 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; +#define __KVM_HAVE_ARCH_NONCOHERENT_DMA + atomic_t noncoherent_dma_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; @@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -int kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask); -int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_enable_tdp(void); void kvm_disable_tdp(void); -int complete_pio(struct kvm_vcpu *vcpu); -bool kvm_check_iopl(struct kvm_vcpu *vcpu); - static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) { return gpa; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648ed..07537a44216e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -20,7 +20,7 @@ static inline void __mutex_fastpath_lock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " decl %0\n" + asm_volatile_goto(LOCK_PREFIX " decl %0\n" " jns %l[exit]\n" : : "m" (v->counter) : "memory", "cc" @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) static inline void __mutex_fastpath_unlock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " incl %0\n" + asm_volatile_goto(LOCK_PREFIX " incl %0\n" " jg %l[exit]\n" : : "m" (v->counter) : "memory", "cc" diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index be8269b00e2a..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct timespec *ts); void pvclock_resume(void); +void pvclock_touch_watchdogs(void); + /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b7..b913915e8e63 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } -static inline unsigned long mfn_to_pfn(unsigned long mfn) +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) { unsigned long pfn; - int ret = 0; + int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely(mfn >= machine_to_phys_nr)) { - pfn = ~0; - goto try_override; - } - pfn = 0; + if (unlikely(mfn >= machine_to_phys_nr)) + return ~0; + /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); -try_override: - /* ret might be < 0 if there are no entries in the m2p for mfn */ if (ret < 0) - pfn = ~0; - else if (get_phys_to_machine(pfn) != mfn) + return ~0; + + return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -111,6 +119,7 @@ try_override: * m2p_find_override_pfn returns ~0 if it doesn't find anything. */ pfn = m2p_find_override_pfn(mfn, ~0); + } /* * pfn is ~0 if there are no entries in the m2p for mfn or if the diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index bb0465090ae5..b93e09a0fa21 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -536,6 +536,7 @@ /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b9729..9d8449158cf9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); @@ -1883,26 +1883,21 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_time_zero = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return; - - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + if (!sched_clock_stable) return; - userpg->cap_usr_time = 1; + userpg->cap_user_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; - if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_usr_time_zero = 1; - userpg->time_zero = this_cpu_read(cyc2ns_offset); - } + userpg->cap_user_time_zero = 1; + userpg->time_zero = this_cpu_read(cyc2ns_offset); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c31b3c3..f31a1655d1ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) break; case 55: /* Atom 22nm "Silvermont" */ + case 77: /* Avoton "Silvermont" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e4..4118f9f68315 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) { struct intel_uncore_box *box; int i, size; size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); - box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; @@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, 0); + box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(type, cpu); + box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 697b93af02dd..a0e2a8a80c94 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); + pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); + pv_lock_ops.unlock_kick = kvm_unlock_kick; +} + +static __init int kvm_spinlock_init_jump(void) +{ + if (!kvm_para_available()) + return 0; + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) + return 0; static_key_slow_inc(¶virt_ticketlocks_enabled); + printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); - pv_lock_ops.unlock_kick = kvm_unlock_kick; + return 0; } +early_initcall(kvm_spinlock_init_jump); + #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1570e0741344..e6041094ff26 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void) src = &hv_clock[cpu].pvti; if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { src->flags &= ~PVCLOCK_GUEST_STOPPED; + pvclock_touch_watchdogs(); ret = true; } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7123b5df479d..af99f71aeb7f 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; + uci->cpu_sig.rev = rev; return 0; } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index a16bae3f83b3..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) return pv_tsc_khz; } +void pvclock_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); + reset_hung_task_detector(); +} + static atomic64_t last_value = ATOMIC64_INIT(0); void pvclock_resume(void) @@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) version = __pvclock_read_cycles(src, &ret, &flags); } while ((src->version & 1) || version != src->version); + if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { + src->flags &= ~PVCLOCK_GUEST_STOPPED; + pvclock_touch_watchdogs(); + } + if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && (flags & PVCLOCK_TSC_STABLE_BIT)) return ret; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 563ed91e6faa..7e920bff99a3 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -326,6 +326,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), }, }, + { /* Handle problems with rebooting on the Latitude E5410. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E5410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"), + }, + }, { /* Handle problems with rebooting on the Latitude E5420. */ .callback = set_pci_reboot, .ident = "Dell Latitude E5420", @@ -352,12 +360,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, { /* Handle problems with rebooting on the Precision M6600. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell Precision M6600", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), }, }, + { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, + { /* Some C6100 machines were shipped with vendor being 'Dell'. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, { } }; diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 22513e96b012..86179d409893 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -72,14 +72,14 @@ __init int create_simplefb(const struct screen_info *si, * the part that is occupied by the framebuffer */ len = mode->height * mode->stride; len = PAGE_ALIGN(len); - if (len > si->lfb_size << 16) { + if (len > (u64)si->lfb_size << 16) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); - res.flags = IORESOURCE_MEM; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; res.start = si->lfb_base; res.end = si->lfb_base + len - 1; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b110fe6c03d4..c6976257eff5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -23,6 +23,26 @@ #include "mmu.h" #include "trace.h" +static u32 xstate_required_size(u64 xstate_bv) +{ + int feature_bit = 0; + u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + + xstate_bv &= ~XSTATE_FPSSE; + while (xstate_bv) { + if (xstate_bv & 0x1) { + u32 eax, ebx, ecx, edx; + cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); + ret = max(ret, eax + ebx); + } + + xstate_bv >>= 1; + feature_bit++; + } + + return ret; +} + void kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) apic->lapic_timer.timer_mode_mask = 1 << 17; } + best = kvm_find_cpuid_entry(vcpu, 0xD, 0); + if (!best) { + vcpu->arch.guest_supported_xcr0 = 0; + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + } else { + vcpu->arch.guest_supported_xcr0 = + (best->eax | ((u64)best->edx << 32)) & + host_xcr0 & KVM_SUPPORTED_XCR0; + vcpu->arch.guest_xstate_size = + xstate_required_size(vcpu->arch.guest_supported_xcr0); + } + kvm_pmu_cpuid_update(vcpu); } @@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit) { u64 mask = ((u64)1 << bit); - return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; + return mask & KVM_SUPPORTED_XCR0 & host_xcr0; } #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + switch (func) { + case 0: + entry->eax = 1; /* only one leaf currently */ + ++*nent; + break; + case 1: + entry->ecx = F(MOVBE); + ++*nent; + break; + default: + break; + } + + entry->function = func; + entry->index = index; + + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, case 0xd: { int idx, i; + entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; + entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; for (idx = 1, i = 1; idx < 64; ++idx) { if (*nent >= maxnent) @@ -481,6 +537,15 @@ out: return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + __u32 pad[3]; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (copy_from_user(pad, entries[i].padding, sizeof(pad))) + return true; + + if (pad[0] || pad[1] || pad[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; - cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); + cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) goto out; @@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) *edx = best->edx; } else *eax = *ebx = *ecx = *edx = 0; + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); } EXPORT_SYMBOL_GPL(kvm_cpuid); @@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); kvm_register_write(vcpu, VCPU_REGS_RDX, edx); kvm_x86_ops->skip_emulated_instruction(vcpu); - trace_kvm_cpuid(function, eax, ebx, ecx, edx); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ddc3f3d2afdb..07ffca0a89e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -130,7 +130,7 @@ #define Mov (1<<20) /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ -#define VendorSpecific (1<<22) /* Vendor specific instruction */ +#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ #define Undefined (1<<25) /* No Such Instruction */ @@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, * @highbyte_regs specifies whether to decode AH,CH,DH,BH. */ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, - int highbyte_regs) + int byteop) { void *p; + int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; @@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, struct operand *op) { unsigned reg = ctxt->modrm_reg; - int highbyte_regs = ctxt->rex_prefix == 0; if (!(ctxt->d & ModRM)) reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); @@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, } op->type = OP_REG; - if (ctxt->d & ByteOp) { - op->addr.reg = decode_register(ctxt, reg, highbyte_regs); - op->bytes = 1; - } else { - op->addr.reg = decode_register(ctxt, reg, 0); - op->bytes = ctxt->op_bytes; - } + op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; + op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); + fetch_register_operand(op); op->orig_val = op->val; } @@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3) { - int highbyte_regs = ctxt->rex_prefix == 0; - op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, - highbyte_regs && (ctxt->d & ByteOp)); + ctxt->d & ByteOp); if (ctxt->d & Sse) { op->type = OP_XMM; op->bytes = 16; @@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +#define FFL(x) bit(X86_FEATURE_##x) + +static int em_movbe(struct x86_emulate_ctxt *ctxt) +{ + u32 ebx, ecx, edx, eax = 1; + u16 tmp; + + /* + * Check MOVBE is set in the guest-visible CPUID leaf. + */ + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + if (!(ecx & FFL(MOVBE))) + return emulate_ud(ctxt); + + switch (ctxt->op_bytes) { + case 2: + /* + * From MOVBE definition: "...When the operand size is 16 bits, + * the upper word of the destination register remains unchanged + * ..." + * + * Both casting ->valptr and ->val to u16 breaks strict aliasing + * rules so we have to do the operation almost per hand. + */ + tmp = (u16)ctxt->src.val; + ctxt->dst.val &= ~0xffffUL; + ctxt->dst.val |= (unsigned long)swab16(tmp); + break; + case 4: + ctxt->dst.val = swab32((u32)ctxt->src.val); + break; + case 8: + ctxt->dst.val = swab64(ctxt->src.val); + break; + default: + return X86EMUL_PROPAGATE_FAULT; + } + return X86EMUL_CONTINUE; +} + static int em_cr_write(struct x86_emulate_ctxt *ctxt) { if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) @@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_sahf(struct x86_emulate_ctxt *ctxt) +{ + u32 flags; + + flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; + flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; + + ctxt->eflags &= ~0xffUL; + ctxt->eflags |= flags | X86_EFLAGS_FIXED; + return X86EMUL_CONTINUE; +} + static int em_lahf(struct x86_emulate_ctxt *ctxt) { *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; @@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = { static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), - II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), + II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | Prot | Priv, stgi, check_svme), @@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { { II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - I(SrcNone | Priv | VendorSpecific, em_vmcall), + I(SrcNone | Priv | EmulateOnUD, em_vmcall), EXT(0, group7_rm1), N, EXT(0, group7_rm3), II(SrcNone | DstMem | Mov, em_smsw, smsw), N, @@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = { D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), I(SrcImmFAddr | No64, em_call_far), N, II(ImplicitOps | Stack, em_pushf, pushf), - II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), + II(ImplicitOps | Stack, em_popf, popf), + I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), @@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = { static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, - N, I(ImplicitOps | VendorSpecific, em_syscall), + N, I(ImplicitOps | EmulateOnUD, em_syscall), II(ImplicitOps | Priv, em_clts, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, @@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = { IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), II(ImplicitOps | Priv, em_rdmsr, rdmsr), IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), - I(ImplicitOps | VendorSpecific, em_sysenter), - I(ImplicitOps | Priv | VendorSpecific, em_sysexit), + I(ImplicitOps | EmulateOnUD, em_sysenter), + I(ImplicitOps | Priv | EmulateOnUD, em_sysexit), N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ @@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N }; +static const struct gprefix three_byte_0f_38_f0 = { + I(DstReg | SrcMem | Mov, em_movbe), N, N, N +}; + +static const struct gprefix three_byte_0f_38_f1 = { + I(DstMem | SrcReg | Mov, em_movbe), N, N, N +}; + +/* + * Insns below are selected by the prefix which indexed by the third opcode + * byte. + */ +static const struct opcode opcode_map_0f_38[256] = { + /* 0x00 - 0x7f */ + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), + /* 0x80 - 0xef */ + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), + /* 0xf0 - 0xf1 */ + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), + /* 0xf2 - 0xff */ + N, N, X4(N), X8(N) +}; + #undef D #undef N #undef G @@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpMem8: ctxt->memop.bytes = 1; if (ctxt->memop.type == OP_REG) { - ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); + ctxt->memop.addr.reg = decode_register(ctxt, + ctxt->modrm_rm, true); fetch_register_operand(&ctxt->memop); } goto mem_common; @@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->_eip = ctxt->eip; ctxt->fetch.start = ctxt->_eip; ctxt->fetch.end = ctxt->fetch.start + insn_len; + ctxt->opcode_len = 1; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); @@ -4208,9 +4281,16 @@ done_prefixes: opcode = opcode_table[ctxt->b]; /* Two-byte opcode? */ if (ctxt->b == 0x0f) { - ctxt->twobyte = 1; + ctxt->opcode_len = 2; ctxt->b = insn_fetch(u8, ctxt); opcode = twobyte_table[ctxt->b]; + + /* 0F_38 opcode map */ + if (ctxt->b == 0x38) { + ctxt->opcode_len = 3; + ctxt->b = insn_fetch(u8, ctxt); + opcode = opcode_map_0f_38[ctxt->b]; + } } ctxt->d = opcode.flags; @@ -4267,7 +4347,7 @@ done_prefixes: if (ctxt->d == 0 || (ctxt->d & NotImpl)) return EMULATION_FAILED; - if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) + if (!(ctxt->d & EmulateOnUD) && ctxt->ud) return EMULATION_FAILED; if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) @@ -4540,8 +4620,10 @@ special_insn: goto writeback; } - if (ctxt->twobyte) + if (ctxt->opcode_len == 2) goto twobyte_insn; + else if (ctxt->opcode_len == 3) + goto threebyte_insn; switch (ctxt->b) { case 0x63: /* movsxd */ @@ -4726,6 +4808,8 @@ twobyte_insn: goto cannot_emulate; } +threebyte_insn: + if (rc != X86EMUL_CONTINUE) goto done; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dce0df8150df..40772ef0f2b1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, kvm_release_pfn_clean(pfn); } -static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) -{ - mmu_free_roots(vcpu); -} - static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { @@ -3424,18 +3419,11 @@ out_unlock: return 0; } -static void nonpaging_free(struct kvm_vcpu *vcpu) -{ - mmu_free_roots(vcpu); -} - -static int nonpaging_init_context(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void nonpaging_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { - context->new_cr3 = nonpaging_new_cr3; context->page_fault = nonpaging_page_fault; context->gva_to_gpa = nonpaging_gva_to_gpa; - context->free = nonpaging_free; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; @@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, context->root_hpa = INVALID_PAGE; context->direct_map = true; context->nx = false; - return 0; } void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) @@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); -static void paging_new_cr3(struct kvm_vcpu *vcpu) +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) { - pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); mmu_free_roots(vcpu); } @@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, vcpu->arch.mmu.inject_page_fault(vcpu, fault); } -static void paging_free(struct kvm_vcpu *vcpu) -{ - nonpaging_free(vcpu); -} - static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, unsigned access, int *nr_present) { @@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) mmu->last_pte_bitmap = map; } -static int paging64_init_context_common(struct kvm_vcpu *vcpu, - struct kvm_mmu *context, - int level) +static void paging64_init_context_common(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, + int level) { context->nx = is_nx(vcpu); context->root_level = level; @@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, update_last_pte_bitmap(vcpu, context); ASSERT(is_pae(vcpu)); - context->new_cr3 = paging_new_cr3; context->page_fault = paging64_page_fault; context->gva_to_gpa = paging64_gva_to_gpa; context->sync_page = paging64_sync_page; context->invlpg = paging64_invlpg; context->update_pte = paging64_update_pte; - context->free = paging_free; context->shadow_root_level = level; context->root_hpa = INVALID_PAGE; context->direct_map = false; - return 0; } -static int paging64_init_context(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void paging64_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { - return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); + paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); } -static int paging32_init_context(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void paging32_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { context->nx = false; context->root_level = PT32_ROOT_LEVEL; @@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, update_permission_bitmask(vcpu, context, false); update_last_pte_bitmap(vcpu, context); - context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; - context->free = paging_free; context->sync_page = paging32_sync_page; context->invlpg = paging32_invlpg; context->update_pte = paging32_update_pte; context->shadow_root_level = PT32E_ROOT_LEVEL; context->root_hpa = INVALID_PAGE; context->direct_map = false; - return 0; } -static int paging32E_init_context(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void paging32E_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { - return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); + paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); } -static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) +static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = vcpu->arch.walk_mmu; context->base_role.word = 0; - context->new_cr3 = nonpaging_new_cr3; context->page_fault = tdp_page_fault; - context->free = nonpaging_free; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; @@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) update_permission_bitmask(vcpu, context, false); update_last_pte_bitmap(vcpu, context); - - return 0; } -int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - int r; bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (!is_paging(vcpu)) - r = nonpaging_init_context(vcpu, context); + nonpaging_init_context(vcpu, context); else if (is_long_mode(vcpu)) - r = paging64_init_context(vcpu, context); + paging64_init_context(vcpu, context); else if (is_pae(vcpu)) - r = paging32E_init_context(vcpu, context); + paging32E_init_context(vcpu, context); else - r = paging32_init_context(vcpu, context); + paging32_init_context(vcpu, context); vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); vcpu->arch.mmu.base_role.smep_andnot_wp = smep && !is_write_protection(vcpu); - - return r; } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); -int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly) { ASSERT(vcpu); @@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, context->shadow_root_level = kvm_x86_ops->get_tdp_level(); context->nx = true; - context->new_cr3 = paging_new_cr3; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_page = ept_sync_page; context->invlpg = ept_invlpg; context->update_pte = ept_update_pte; - context->free = paging_free; context->root_level = context->shadow_root_level; context->root_hpa = INVALID_PAGE; context->direct_map = false; update_permission_bitmask(vcpu, context, true); reset_rsvds_bits_mask_ept(vcpu, context, execonly); - - return 0; } EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); -static int init_kvm_softmmu(struct kvm_vcpu *vcpu) +static void init_kvm_softmmu(struct kvm_vcpu *vcpu) { - int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); - + kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; vcpu->arch.walk_mmu->get_cr3 = get_cr3; vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; - - return r; } -static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) +static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; @@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) update_permission_bitmask(vcpu, g_context, false); update_last_pte_bitmap(vcpu, g_context); - - return 0; } -static int init_kvm_mmu(struct kvm_vcpu *vcpu) +static void init_kvm_mmu(struct kvm_vcpu *vcpu) { if (mmu_is_nested(vcpu)) return init_kvm_nested_mmu(vcpu); @@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) return init_kvm_softmmu(vcpu); } -static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) +void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) { ASSERT(vcpu); - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) - /* mmu.free() should set root_hpa = INVALID_PAGE */ - vcpu->arch.mmu.free(vcpu); -} -int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) -{ - destroy_kvm_mmu(vcpu); - return init_kvm_mmu(vcpu); + kvm_mmu_unload(vcpu); + init_kvm_mmu(vcpu); } EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); @@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); + WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); } EXPORT_SYMBOL_GPL(kvm_mmu_unload); @@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) return alloc_mmu_pages(vcpu); } -int kvm_mmu_setup(struct kvm_vcpu *vcpu) +void kvm_mmu_setup(struct kvm_vcpu *vcpu) { ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); - return init_kvm_mmu(vcpu); + init_kvm_mmu(vcpu); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) @@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4478,9 +4432,8 @@ unlock: break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return freed; - } static unsigned long @@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { ASSERT(vcpu); - destroy_kvm_mmu(vcpu); + kvm_mmu_unload(vcpu); free_mmu_pages(vcpu); mmu_free_memory_caches(vcpu); } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 77e044a0f5f7..292615274358 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -70,8 +70,8 @@ enum { }; int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); -int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); -int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c0bc80391e40..c7168a5cff1b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) +static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { - int r; - - r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); + kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; @@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; - - return r; } static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a1216de9ffda..e293a62a11d6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, break; if (i == NR_AUTOLOAD_MSRS) { - printk_once(KERN_WARNING"Not enough mst switch entries. " + printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; } else if (i == m->nr) { @@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) /* * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. - * This function assumes it is called with the exit reason in vmcs02 being - * a #PF exception (this is the only case in which KVM injects a #PF when L2 - * is running). */ -static int nested_pf_handled(struct kvm_vcpu *vcpu) +static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ - if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) + if (!(vmcs12->exception_bitmap & (1u << nr))) return 0; nested_vmx_vmexit(vcpu); @@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (nr == PF_VECTOR && is_guest_mode(vcpu) && - !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) + if (!reinject && is_guest_mode(vcpu) && + nested_vmx_check_exception(vcpu, nr)) return; if (has_error_code) { @@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void) #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { + nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + } nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | - VM_EXIT_LOAD_IA32_EFER); + VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); nested_vmx_procbased_ctls_low = 0; nested_vmx_procbased_ctls_high &= - CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_VIRTUAL_INTR_PENDING | + CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING | @@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_WBINVD_EXITING; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | - VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; + VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | + VMX_EPT_INVEPT_BIT; nested_vmx_ept_caps &= vmx_capability.ept; /* * Since invept is completely emulated we support both global @@ -3255,25 +3260,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void ept_load_pdptrs(struct kvm_vcpu *vcpu) { + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + if (!test_bit(VCPU_EXREG_PDPTR, (unsigned long *)&vcpu->arch.regs_dirty)) return; if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); + vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); } } static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); } __set_bit(VCPU_EXREG_PDPTR, @@ -3376,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(cr3); vmcs_write64(EPT_POINTER, eptp); - guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : - vcpu->kvm->arch.ept_identity_map_addr; + if (is_paging(vcpu) || is_guest_mode(vcpu)) + guest_cr3 = kvm_read_cr3(vcpu); + else + guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -4875,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xc1; } +static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val) +{ + unsigned long always_on = VMXON_CR0_ALWAYSON; + + if (nested_vmx_secondary_ctls_high & + SECONDARY_EXEC_UNRESTRICTED_GUEST && + nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) + always_on &= ~(X86_CR0_PE | X86_CR0_PG); + return (val & always_on) == always_on; +} + /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { @@ -4893,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) val = (val & ~vmcs12->cr0_guest_host_mask) | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); - /* TODO: will have to take unrestricted guest mode into - * account */ - if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) + if (!nested_cr0_valid(vmcs12, val)) return 1; if (kvm_set_cr0(vcpu, val)) @@ -5345,7 +5365,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * There are errata that may cause this bit to not be set: * AAK134, BY25. */ - if (exit_qualification & INTR_INFO_UNBLOCK_NMI) + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + cpu_has_virtual_nmis() && + (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); @@ -6716,6 +6738,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = vmcs_read32(VM_EXIT_INTR_INFO); } +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) +{ + u64 delta_tsc_l1; + u32 preempt_val_l1, preempt_val_l2, preempt_scale; + + if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER)) + return; + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) + - vcpu->arch.last_guest_tsc; + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; + if (preempt_val_l2 <= preempt_val_l1) + preempt_val_l2 = 0; + else + preempt_val_l2 -= preempt_val_l1; + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -6730,20 +6773,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); - /* - * the KVM_REQ_EVENT optimization bit is only on for one entry, and if - * we did not inject a still-pending event to L1 now because of - * nested_run_pending, we need to re-enable this bit. - */ - if (vmx->nested.nested_run_pending) - kvm_make_request(KVM_REQ_EVENT, vcpu); - - if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH || - exit_reason == EXIT_REASON_VMRESUME)) - vmx->nested.nested_run_pending = 1; - else - vmx->nested.nested_run_pending = 0; - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { nested_vmx_vmexit(vcpu); return 1; @@ -7055,9 +7084,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); - kvm_queue_exception_e(vcpu, vector, err); + kvm_requeue_exception_e(vcpu, vector, err); } else - kvm_queue_exception(vcpu, vector); + kvm_requeue_exception(vcpu, vector); break; case INTR_TYPE_SOFT_INTR: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); @@ -7140,6 +7169,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); + if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) + nested_adjust_preemption_timer(vcpu); vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -7278,6 +7309,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); + /* + * the KVM_REQ_EVENT optimization bit is only on for one entry, and if + * we did not inject a still-pending event to L1 now because of + * nested_run_pending, we need to re-enable this bit. + */ + if (vmx->nested.nested_run_pending) + kvm_make_request(KVM_REQ_EVENT, vcpu); + + vmx->nested.nested_run_pending = 0; + vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); @@ -7404,8 +7445,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) */ if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; - else if (vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) ret = kvm_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; else @@ -7495,9 +7535,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) return get_vmcs12(vcpu)->ept_pointer; } -static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) +static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { - int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, + kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; @@ -7505,8 +7545,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; - - return r; } static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) @@ -7514,6 +7552,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.walk_mmu = &vcpu->arch.mmu; } +static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, + struct x86_exception *fault) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + WARN_ON(!is_guest_mode(vcpu)); + + /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ + if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) + nested_vmx_vmexit(vcpu); + else + kvm_inject_page_fault(vcpu, fault); +} + /* * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it @@ -7527,6 +7579,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; + u32 exit_control; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -7700,7 +7753,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits are further modified by vmx_set_efer() below. */ - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + exit_control = vmcs_config.vmexit_ctrl; + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + vmcs_write32(VM_EXIT_CONTROLS, exit_control); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. @@ -7767,6 +7823,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) kvm_set_cr3(vcpu, vmcs12->guest_cr3); kvm_mmu_reset_context(vcpu); + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; + /* * L1 may access the L2's PDPTR, so save them to construct vmcs12 */ @@ -7775,10 +7834,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); - __clear_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_avail); - __clear_bit(VCPU_EXREG_PDPTR, - (unsigned long *)&vcpu->arch.regs_dirty); } kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); @@ -7874,7 +7929,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || + if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) || ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); @@ -7936,6 +7991,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); + vmx->nested.nested_run_pending = 1; + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); cpu = get_cpu(); @@ -8003,7 +8060,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, u32 idt_vectoring; unsigned int nr; - if (vcpu->arch.exception.pending) { + if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { nr = vcpu->arch.exception.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -8021,7 +8078,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } vmcs12->idt_vectoring_info_field = idt_vectoring; - } else if (vcpu->arch.nmi_pending) { + } else if (vcpu->arch.nmi_injected) { vmcs12->idt_vectoring_info_field = INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; } else if (vcpu->arch.interrupt.pending) { @@ -8103,6 +8160,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && + (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) + vmcs12->vmx_preemption_timer_value = + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + /* * In some cases (usually, nested EPT), L2 is allowed to change its * own CR3 without exiting. If it has changed it, we must keep it. @@ -8128,6 +8190,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) + vmcs12->guest_ia32_efer = vcpu->arch.efer; vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); @@ -8199,7 +8263,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * fpu_active (which may have changed). * Note that vmx_set_cr0 refers to efer set above. */ - kvm_set_cr0(vcpu, vmcs12->host_cr0); + vmx_set_cr0(vcpu, vmcs12->host_cr0); /* * If we did fpu_activate()/fpu_deactivate() during L2's run, we need * to apply the same changes to L1's vmcs. We just set cr0 correctly, @@ -8222,6 +8286,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_set_cr3(vcpu, vmcs12->host_cr3); kvm_mmu_reset_context(vcpu); + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + if (enable_vpid) { /* * Trivially support vpid by letting L2s share their parent diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5ca72a5cdb6..21ef1ba184ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0; + u64 valid_bits; /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ if (index != XCR_XFEATURE_ENABLED_MASK) @@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) return 1; if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) return 1; - if (xcr0 & ~host_xcr0) + + /* + * Do not allow the guest to set bits that we do not support + * saving. However, xcr0 bit 0 is always set, even if the + * emulated CPU does not support XSAVE (see fx_init). + */ + valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; + if (xcr0 & ~valid_bits) return 1; + kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; return 0; @@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->arch.cr3 = cr3; __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); - vcpu->arch.mmu.new_cr3(vcpu); + kvm_mmu_new_cr3(vcpu); return 0; } EXPORT_SYMBOL_GPL(kvm_set_cr3); @@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; @@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage) static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); + return kvm_arch_has_noncoherent_dma(vcpu->kvm); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - if (cpu_has_xsave) + if (cpu_has_xsave) { memcpy(guest_xsave->region, &vcpu->arch.guest_fpu.state->xsave, - xstate_size); - else { + vcpu->arch.guest_xstate_size); + *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= + vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; + } else { memcpy(guest_xsave->region, &vcpu->arch.guest_fpu.state->fxsave, sizeof(struct i387_fxsave_struct)); @@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, u64 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - if (cpu_has_xsave) + if (cpu_has_xsave) { + /* + * Here we allow setting states that are not present in + * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility + * with old userspace. + */ + if (xstate_bv & ~KVM_SUPPORTED_XCR0) + return -EINVAL; + if (xstate_bv & ~host_xcr0) + return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state->xsave, - guest_xsave->region, xstate_size); - else { + guest_xsave->region, vcpu->arch.guest_xstate_size); + } else { if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state->fxsave, @@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, for (i = 0; i < guest_xcrs->nr_xcrs; i++) /* Only support XCR0 currently */ - if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { + if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) { r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, - guest_xcrs->xcrs[0].value); + guest_xcrs->xcrs[i].value); break; } if (r) @@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) static void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->twobyte, 0, - (void *)&ctxt->_regs - (void *)&ctxt->twobyte); + memset(&ctxt->opcode_len, 0, + (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); ctxt->fetch.start = 0; ctxt->fetch.end = 0; @@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, ctxt->have_exception = false; ctxt->perm_ok = false; - ctxt->only_vendor_specific_insn - = emulation_type & EMULTYPE_TRAP_UD; + ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; r = x86_decode_insn(ctxt, insn, insn_len); @@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); @@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = IN_GUEST_MODE; + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + /* We should set ->mode before check ->requests, * see the comment in make_all_cpus_request. */ - smp_mb(); + smp_mb__after_srcu_read_unlock(); local_irq_disable(); @@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) smp_wmb(); local_irq_enable(); preempt_enable(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = 1; goto cancel_injection; } - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (r) return r; kvm_vcpu_reset(vcpu); - r = kvm_mmu_setup(vcpu); + kvm_mmu_setup(vcpu); vcpu_put(vcpu); return r; @@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.ia32_tsc_adjust_msr = 0x0; vcpu->arch.pv_time_enabled = false; + + vcpu->arch.guest_supported_xcr0 = 0; + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); @@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + atomic_set(&kvm->arch.noncoherent_dma_count, 0); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); @@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; @@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); @@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); + +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); + +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e224f7a671b6..587fb9ede436 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception); +#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) extern u64 host_xcr0; extern struct static_key kvm_no_apic_vcpu; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5596c7bdd327..082e88129712 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) return -ENODEV; - if (start > end || !addr) + if (start > end) return -EINVAL; mutex_lock(&pci_mmcfg_lock); @@ -716,6 +716,11 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, return -EEXIST; } + if (!addr) { + mutex_unlock(&pci_mmcfg_lock); + return -EINVAL; + } + rc = -EBUSY; cfg = pci_mmconfig_alloc(seg, start, end, addr); if (cfg == NULL) { diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..c7e22ab29a5a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; + } size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b901e8d782d..a61c7d5811be 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == mfn) + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; @@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page, * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d1e4777b4e75..31d04758b76f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void) old memory can be recycled */ make_lowmem_page_readwrite(xen_initial_gdt); +#ifdef CONFIG_X86_32 + /* + * Xen starts us with XEN_FLAT_RING1_DS, but linux code + * expects __USER_DS + */ + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); +#endif + xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 253f63fceea1..be6b86078957 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu) } +/* + * Our init of PV spinlocks is split in two init functions due to us + * using paravirt patching and jump labels patching and having to do + * all of this before SMP code is invoked. + * + * The paravirt patching needs to be done _before_ the alternative asm code + * is started, otherwise we would not patch the core kernel code. + */ void __init xen_init_spinlocks(void) { @@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void) return; } - static_key_slow_inc(¶virt_ticketlocks_enabled); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } +/* + * While the jump_label init code needs to happend _after_ the jump labels are + * enabled and before SMP is started. Hence we use pre-SMP initcall level + * init. We cannot do it in xen_init_spinlocks as that is done before + * jump labels are activated. + */ +static __init int xen_init_spinlocks_jump(void) +{ + if (!xen_pvspin) + return 0; + + static_key_slow_inc(¶virt_ticketlocks_enabled); + return 0; +} +early_initcall(xen_init_spinlocks_jump); + static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; diff --git a/block/Kconfig b/block/Kconfig index 7f38e40fee08..2429515c05c2 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -99,11 +99,16 @@ config BLK_DEV_THROTTLING See Documentation/cgroups/blkio-controller.txt for more information. -config CMDLINE_PARSER +config BLK_CMDLINE_PARSER bool "Block device command line partition parser" default n ---help--- - Parsing command line, get the partitions information. + Enabling this option allows you to specify the partition layout from + the kernel boot args. This is typically of use for embedded devices + which don't otherwise have any standardized method for listing the + partitions on a block device. + + See Documentation/block/cmdline-partition.txt for more information. menu "Partition Types" diff --git a/block/Makefile b/block/Makefile index 4fa4be544ece..671a83d063a5 100644 --- a/block/Makefile +++ b/block/Makefile @@ -18,4 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o -obj-$(CONFIG_CMDLINE_PARSER) += cmdline-parser.o +obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 87a32086535d..9b29a996c311 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -263,7 +263,7 @@ config SYSV68_PARTITION config CMDLINE_PARTITION bool "Command line partition support" if PARTITION_ADVANCED - select CMDLINE_PARSER + select BLK_CMDLINE_PARSER help - Say Y here if you would read the partitions table from bootargs. + Say Y here if you want to read the partition table from bootargs. The format for the command line is just like mtdparts. diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c index 56cf4ffad51e..5141b563adf1 100644 --- a/block/partitions/cmdline.c +++ b/block/partitions/cmdline.c @@ -2,15 +2,15 @@ * Copyright (C) 2013 HUAWEI * Author: Cai Zhiyong <caizhiyong@huawei.com> * - * Read block device partition table from command line. - * The partition used for fixed block device (eMMC) embedded device. - * It is no MBR, save storage space. Bootloader can be easily accessed + * Read block device partition table from the command line. + * Typically used for fixed block (eMMC) embedded devices. + * It has no MBR, so saves storage space. Bootloader can be easily accessed * by absolute address of data on the block device. * Users can easily change the partition. * * The format for the command line is just like mtdparts. * - * Verbose config please reference "Documentation/block/cmdline-partition.txt" + * For further information, see "Documentation/block/cmdline-partition.txt" * */ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 22327e6a7236..5ea5c32609ac 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -24,7 +24,7 @@ menuconfig ACPI are configured, ACPI is used. The project home page for the Linux ACPI subsystem is here: - <http://www.lesswatts.org/projects/acpi/> + <https://01.org/linux-acpi> Linux support for ACPI is based on Intel Corporation's ACPI Component Architecture (ACPI CA). For more information on the diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c index f40acef80269..a6977e12d574 100644 --- a/drivers/acpi/acpi_ipmi.c +++ b/drivers/acpi/acpi_ipmi.c @@ -39,6 +39,7 @@ #include <linux/ipmi.h> #include <linux/device.h> #include <linux/pnp.h> +#include <linux/spinlock.h> MODULE_AUTHOR("Zhao Yakui"); MODULE_DESCRIPTION("ACPI IPMI Opregion driver"); @@ -57,7 +58,7 @@ struct acpi_ipmi_device { struct list_head head; /* the IPMI request message list */ struct list_head tx_msg_list; - struct mutex tx_msg_lock; + spinlock_t tx_msg_lock; acpi_handle handle; struct pnp_dev *pnp_dev; ipmi_user_t user_interface; @@ -147,6 +148,7 @@ static void acpi_format_ipmi_msg(struct acpi_ipmi_msg *tx_msg, struct kernel_ipmi_msg *msg; struct acpi_ipmi_buffer *buffer; struct acpi_ipmi_device *device; + unsigned long flags; msg = &tx_msg->tx_message; /* @@ -177,10 +179,10 @@ static void acpi_format_ipmi_msg(struct acpi_ipmi_msg *tx_msg, /* Get the msgid */ device = tx_msg->device; - mutex_lock(&device->tx_msg_lock); + spin_lock_irqsave(&device->tx_msg_lock, flags); device->curr_msgid++; tx_msg->tx_msgid = device->curr_msgid; - mutex_unlock(&device->tx_msg_lock); + spin_unlock_irqrestore(&device->tx_msg_lock, flags); } static void acpi_format_ipmi_response(struct acpi_ipmi_msg *msg, @@ -242,6 +244,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) int msg_found = 0; struct acpi_ipmi_msg *tx_msg; struct pnp_dev *pnp_dev = ipmi_device->pnp_dev; + unsigned long flags; if (msg->user != ipmi_device->user_interface) { dev_warn(&pnp_dev->dev, "Unexpected response is returned. " @@ -250,7 +253,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) ipmi_free_recv_msg(msg); return; } - mutex_lock(&ipmi_device->tx_msg_lock); + spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags); list_for_each_entry(tx_msg, &ipmi_device->tx_msg_list, head) { if (msg->msgid == tx_msg->tx_msgid) { msg_found = 1; @@ -258,7 +261,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) } } - mutex_unlock(&ipmi_device->tx_msg_lock); + spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags); if (!msg_found) { dev_warn(&pnp_dev->dev, "Unexpected response (msg id %ld) is " "returned.\n", msg->msgid); @@ -378,6 +381,7 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, struct acpi_ipmi_device *ipmi_device = handler_context; int err, rem_time; acpi_status status; + unsigned long flags; /* * IPMI opregion message. * IPMI message is firstly written to the BMC and system software @@ -395,9 +399,9 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, return AE_NO_MEMORY; acpi_format_ipmi_msg(tx_msg, address, value); - mutex_lock(&ipmi_device->tx_msg_lock); + spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags); list_add_tail(&tx_msg->head, &ipmi_device->tx_msg_list); - mutex_unlock(&ipmi_device->tx_msg_lock); + spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags); err = ipmi_request_settime(ipmi_device->user_interface, &tx_msg->addr, tx_msg->tx_msgid, @@ -413,9 +417,9 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, status = AE_OK; end_label: - mutex_lock(&ipmi_device->tx_msg_lock); + spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags); list_del(&tx_msg->head); - mutex_unlock(&ipmi_device->tx_msg_lock); + spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags); kfree(tx_msg); return status; } @@ -457,7 +461,7 @@ static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device) INIT_LIST_HEAD(&ipmi_device->head); - mutex_init(&ipmi_device->tx_msg_lock); + spin_lock_init(&ipmi_device->tx_msg_lock); INIT_LIST_HEAD(&ipmi_device->tx_msg_list); ipmi_install_space_handler(ipmi_device); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index fbdb82e70d10..407ad13cac2f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -968,7 +968,7 @@ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device) } return 0; } -EXPORT_SYMBOL_GPL(acpi_bus_get_device); +EXPORT_SYMBOL(acpi_bus_get_device); int acpi_device_add(struct acpi_device *device, void (*release)(struct device *)) @@ -1121,7 +1121,7 @@ int acpi_bus_register_driver(struct acpi_driver *driver) EXPORT_SYMBOL(acpi_bus_register_driver); /** - * acpi_bus_unregister_driver - unregisters a driver with the APIC bus + * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus * @driver: driver to unregister * * Unregisters a driver with the ACPI bus. Searches the namespace for all diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 958ba2a420c3..97f4acb54ad6 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -2,7 +2,7 @@ * sata_promise.c - Promise SATA * * Maintained by: Tejun Heo <tj@kernel.org> - * Mikael Pettersson <mikpe@it.uu.se> + * Mikael Pettersson * Please ALWAYS copy linux-ide@vger.kernel.org * on emails. * diff --git a/drivers/base/core.c b/drivers/base/core.c index c7cfadcf6752..34abf4d8a45f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2017,7 +2017,7 @@ EXPORT_SYMBOL_GPL(device_move); */ void device_shutdown(void) { - struct device *dev; + struct device *dev, *parent; spin_lock(&devices_kset->list_lock); /* @@ -2034,7 +2034,7 @@ void device_shutdown(void) * prevent it from being freed because parent's * lock is to be held */ - get_device(dev->parent); + parent = get_device(dev->parent); get_device(dev); /* * Make sure the device is off the kset list, in the @@ -2044,8 +2044,8 @@ void device_shutdown(void) spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ - if (dev->parent) - device_lock(dev->parent); + if (parent) + device_lock(parent); device_lock(dev); /* Don't allow any more runtime suspends */ @@ -2063,11 +2063,11 @@ void device_shutdown(void) } device_unlock(dev); - if (dev->parent) - device_unlock(dev->parent); + if (parent) + device_unlock(parent); put_device(dev); - put_device(dev->parent); + put_device(parent); spin_lock(&devices_kset->list_lock); } diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index c9fd6943ce45..50329d1057ed 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -210,25 +210,6 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc) } } -static void bcma_core_pci_power_save(struct bcma_drv_pci *pc, bool up) -{ - u16 data; - - if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) { - data = up ? 0x74 : 0x7C; - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64); - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); - } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) { - data = up ? 0x75 : 0x7D; - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65); - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); - } -} - /************************************************** * Init. **************************************************/ @@ -255,6 +236,32 @@ void bcma_core_pci_init(struct bcma_drv_pci *pc) bcma_core_pci_clientmode_init(pc); } +void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) +{ + struct bcma_drv_pci *pc; + u16 data; + + if (bus->hosttype != BCMA_HOSTTYPE_PCI) + return; + + pc = &bus->drv_pci[0]; + + if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) { + data = up ? 0x74 : 0x7C; + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64); + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); + } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) { + data = up ? 0x75 : 0x7D; + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65); + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); + } +} +EXPORT_SYMBOL_GPL(bcma_core_pci_power_save); + int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable) { @@ -310,8 +317,6 @@ void bcma_core_pci_up(struct bcma_bus *bus) pc = &bus->drv_pci[0]; - bcma_core_pci_power_save(pc, true); - bcma_core_pci_extend_L1timer(pc, true); } EXPORT_SYMBOL_GPL(bcma_core_pci_up); @@ -326,7 +331,5 @@ void bcma_core_pci_down(struct bcma_bus *bus) pc = &bus->drv_pci[0]; bcma_core_pci_extend_L1timer(pc, false); - - bcma_core_pci_power_save(pc, false); } EXPORT_SYMBOL_GPL(bcma_core_pci_down); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2d95ff5353b..edfa2515bc86 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, int err; u32 cp; + memset(&arg64, 0, sizeof(arg64)); err = 0; err |= copy_from_user(&arg64.LUN_info, &arg32->LUN_info, diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 639d26b90b91..2b9440384536 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1193,6 +1193,7 @@ out_passthru: ida_pci_info_struct pciinfo; if (!arg) return -EINVAL; + memset(&pciinfo, 0, sizeof(pciinfo)); pciinfo.bus = host->pci_dev->bus->number; pciinfo.dev_fn = host->pci_dev->devfn; pciinfo.board_id = host->board_id; diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index a12b923bbaca..0a327f4154a2 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -85,6 +85,7 @@ static struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x3008) }, { USB_DEVICE(0x13d3, 0x3362) }, { USB_DEVICE(0x0CF3, 0xE004) }, + { USB_DEVICE(0x0CF3, 0xE005) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x13d3, 0x3393) }, @@ -126,6 +127,7 @@ static struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8e16f0af6358..f3dfc0a88fdc 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -102,6 +102,7 @@ static struct usb_device_id btusb_table[] = { /* Broadcom BCM20702A0 */ { USB_DEVICE(0x0b05, 0x17b5) }, + { USB_DEVICE(0x0b05, 0x17cb) }, { USB_DEVICE(0x04ca, 0x2003) }, { USB_DEVICE(0x0489, 0xe042) }, { USB_DEVICE(0x413c, 0x8197) }, @@ -112,6 +113,9 @@ static struct usb_device_id btusb_table[] = { /*Broadcom devices with vendor specific id */ { USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) }, + /* Belkin F8065bf - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x050d, 0xff, 0x01, 0x01) }, + { } /* Terminating entry */ }; @@ -148,6 +152,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index 19ab6ff53d59..2394e9753ef5 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -700,6 +700,7 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus, phys_addr_t sdramwins_phys_base, size_t sdramwins_size) { + struct device_node *np; int win; mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size); @@ -712,8 +713,11 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus, return -ENOMEM; } - if (of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric")) + np = of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric"); + if (np) { mbus->hw_io_coherency = 1; + of_node_put(np); + } for (win = 0; win < mbus->soc->num_wins; win++) mvebu_mbus_disable_window(mbus, win); @@ -861,11 +865,13 @@ static void __init mvebu_mbus_get_pcie_resources(struct device_node *np, int ret; /* - * These are optional, so we clear them and they'll - * be zero if they are missing from the DT. + * These are optional, so we make sure that resource_size(x) will + * return 0. */ memset(mem, 0, sizeof(struct resource)); + mem->end = -1; memset(io, 0, sizeof(struct resource)); + io->end = -1; ret = of_property_read_u32_array(np, "pcie-mem-aperture", reg, ARRAY_SIZE(reg)); if (!ret) { diff --git a/drivers/char/random.c b/drivers/char/random.c index 7737b5bd26af..7a744d391756 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -640,7 +640,7 @@ struct timer_rand_state { */ void add_device_randomness(const void *buf, unsigned int size) { - unsigned long time = get_cycles() ^ jiffies; + unsigned long time = random_get_entropy() ^ jiffies; mix_pool_bytes(&input_pool, buf, size, NULL); mix_pool_bytes(&input_pool, &time, sizeof(time), NULL); @@ -677,7 +677,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) goto out; sample.jiffies = jiffies; - sample.cycles = get_cycles(); + sample.cycles = random_get_entropy(); sample.num = num; mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); @@ -744,7 +744,7 @@ void add_interrupt_randomness(int irq, int irq_flags) struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; - __u32 input[4], cycles = get_cycles(); + __u32 input[4], cycles = random_get_entropy(); input[0] = cycles ^ jiffies; input[1] = irq; @@ -1459,12 +1459,11 @@ struct ctl_table random_table[] = { static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init random_int_secret_init(void) +int random_int_secret_init(void) { get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but @@ -1483,7 +1482,7 @@ unsigned int get_random_int(void) hash = get_cpu_var(get_random_int_hash); - hash[0] += current->pid + jiffies + get_cycles(); + hash[0] += current->pid + jiffies + random_get_entropy(); md5_transform(hash, random_int_secret); ret = hash[0]; put_cpu_var(get_random_int_hash); diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 7a7929ba2658..94c280d36e8b 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/interrupt.h> +#include <xen/xen.h> #include <xen/events.h> #include <xen/interface/io/tpmif.h> #include <xen/grant_table.h> @@ -142,32 +143,6 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) return length; } -ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct tpm_private *priv = TPM_VPRIV(chip); - u8 locality = priv->shr->locality; - - return sprintf(buf, "%d\n", locality); -} - -ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct tpm_private *priv = TPM_VPRIV(chip); - u8 val; - - int rv = kstrtou8(buf, 0, &val); - if (rv) - return rv; - - priv->shr->locality = val; - - return len; -} - static const struct file_operations vtpm_ops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -188,8 +163,6 @@ static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); -static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality, - tpm_store_locality); static struct attribute *vtpm_attrs[] = { &dev_attr_pubek.attr, @@ -202,7 +175,6 @@ static struct attribute *vtpm_attrs[] = { &dev_attr_cancel.attr, &dev_attr_durations.attr, &dev_attr_timeouts.attr, - &dev_attr_locality.attr, NULL, }; @@ -210,8 +182,6 @@ static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs, }; -#define TPM_LONG_TIMEOUT (10 * 60 * HZ) - static const struct tpm_vendor_specific tpm_vtpm = { .status = vtpm_status, .recv = vtpm_recv, @@ -224,11 +194,6 @@ static const struct tpm_vendor_specific tpm_vtpm = { .miscdev = { .fops = &vtpm_ops, }, - .duration = { - TPM_LONG_TIMEOUT, - TPM_LONG_TIMEOUT, - TPM_LONG_TIMEOUT, - }, }; static irqreturn_t tpmif_interrupt(int dummy, void *dev_id) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 41c69469ce20..971d796e071d 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -26,6 +26,7 @@ config DW_APB_TIMER_OF config ARMADA_370_XP_TIMER bool + select CLKSRC_OF config ORION_TIMER select CLKSRC_OF diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c index 37f5325bec95..b9ddd9e3a2f5 100644 --- a/drivers/clocksource/clksrc-of.c +++ b/drivers/clocksource/clksrc-of.c @@ -30,6 +30,9 @@ void __init clocksource_of_init(void) clocksource_of_init_fn init_func; for_each_matching_node_and_match(np, __clksrc_of_table, &match) { + if (!of_device_is_available(np)) + continue; + init_func = match->data; init_func(np); } diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index b9c81b7c3a3b..3a5909c12d42 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -301,7 +301,7 @@ static void em_sti_register_clockevent(struct em_sti_priv *p) ced->name = dev_name(&p->pdev->dev); ced->features = CLOCK_EVT_FEAT_ONESHOT; ced->rating = 200; - ced->cpumask = cpumask_of(0); + ced->cpumask = cpu_possible_mask; ced->set_next_event = em_sti_clock_event_next; ced->set_mode = em_sti_clock_event_mode; diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 5b34768f4d7c..62b0de6a1837 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -428,7 +428,6 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) evt->irq); return -EIO; } - irq_set_affinity(evt->irq, cpumask_of(cpu)); } else { enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0); } @@ -449,6 +448,7 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { struct mct_clock_event_device *mevt; + unsigned int cpu; /* * Grab cpu pointer in each case to avoid spurious @@ -459,6 +459,12 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self, mevt = this_cpu_ptr(&percpu_mct_tick); exynos4_local_timer_setup(&mevt->evt); break; + case CPU_ONLINE: + cpu = (unsigned long)hcpu; + if (mct_int_type == MCT_INT_SPI) + irq_set_affinity(mct_irqs[MCT_L0_IRQ + cpu], + cpumask_of(cpu)); + break; case CPU_DYING: mevt = this_cpu_ptr(&percpu_mct_tick); exynos4_local_timer_stop(&mevt->evt); @@ -500,6 +506,8 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem &percpu_mct_tick); WARN(err, "MCT: can't request IRQ %d (%d)\n", mct_irqs[MCT_L0_IRQ], err); + } else { + irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0)); } err = register_cpu_notifier(&exynos4_mct_cpu_nb); diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index a1260b4549db..d2c3253e015e 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -986,6 +986,10 @@ static int __init acpi_cpufreq_init(void) { int ret; + /* don't keep reloading if cpufreq_driver exists */ + if (cpufreq_get_current_driver()) + return 0; + if (acpi_disabled) return 0; diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 78c49d8e0f4a..c522a95c0e16 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -229,7 +229,7 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) if (of_property_read_u32(np, "clock-latency", &transition_latency)) transition_latency = CPUFREQ_ETERNAL; - if (cpu_reg) { + if (!IS_ERR(cpu_reg)) { struct opp *opp; unsigned long min_uV, max_uV; int i; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 89b3c52cd5c3..04548f7023af 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1460,6 +1460,9 @@ unsigned int cpufreq_get(unsigned int cpu) { unsigned int ret_freq = 0; + if (cpufreq_disabled() || !cpufreq_driver) + return -ENOENT; + if (!down_read_trylock(&cpufreq_rwsem)) return 0; diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c index d514c152fd1a..be5380ecdcd4 100644 --- a/drivers/cpufreq/exynos5440-cpufreq.c +++ b/drivers/cpufreq/exynos5440-cpufreq.c @@ -457,7 +457,7 @@ err_free_table: opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table); err_put_node: of_node_put(np); - dev_err(dvfs_info->dev, "%s: failed initialization\n", __func__); + dev_err(&pdev->dev, "%s: failed initialization\n", __func__); return ret; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9733f29ed148..32b3479a2405 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -394,7 +394,10 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) trace_cpu_frequency(pstate * 100000, cpu->cpu); cpu->pstate.current_pstate = pstate; - wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); + if (limits.no_turbo) + wrmsrl(MSR_IA32_PERF_CTL, BIT(32) | (pstate << 8)); + else + wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); } diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 19e364fa5955..3f418166ce02 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -113,7 +113,7 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpufreq_freqs freqs; - unsigned long newfreq; + long newfreq; struct clk *srcclk; int index, ret, mult = 1; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 526ec77c7ba0..f238cfd33847 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -198,6 +198,7 @@ config TI_EDMA depends on ARCH_DAVINCI || ARCH_OMAP select DMA_ENGINE select DMA_VIRTUAL_CHANNELS + select TI_PRIV_EDMA default n help Enable support for the TI EDMA controller. This DMA diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index ff50ff4c6a57..3519111c566b 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -306,6 +306,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( EDMA_SLOT_ANY); if (echan->slot[i] < 0) { dev_err(dev, "Failed to allocate slot\n"); + kfree(edesc); return NULL; } } @@ -749,6 +750,6 @@ static void __exit edma_exit(void) } module_exit(edma_exit); -MODULE_AUTHOR("Matt Porter <mporter@ti.com>"); +MODULE_AUTHOR("Matt Porter <matt.porter@linaro.org>"); MODULE_DESCRIPTION("TI EDMA DMA engine driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 78f8ca5fccee..55852c026791 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -437,17 +437,18 @@ static void dma_irq_handle_channel(struct imxdma_channel *imxdmac) struct imxdma_engine *imxdma = imxdmac->imxdma; int chno = imxdmac->channel; struct imxdma_desc *desc; + unsigned long flags; - spin_lock(&imxdma->lock); + spin_lock_irqsave(&imxdma->lock, flags); if (list_empty(&imxdmac->ld_active)) { - spin_unlock(&imxdma->lock); + spin_unlock_irqrestore(&imxdma->lock, flags); goto out; } desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node); - spin_unlock(&imxdma->lock); + spin_unlock_irqrestore(&imxdma->lock, flags); if (desc->sg) { u32 tmp; @@ -519,7 +520,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d) { struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan); struct imxdma_engine *imxdma = imxdmac->imxdma; - unsigned long flags; int slot = -1; int i; @@ -527,7 +527,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d) switch (d->type) { case IMXDMA_DESC_INTERLEAVED: /* Try to get a free 2D slot */ - spin_lock_irqsave(&imxdma->lock, flags); for (i = 0; i < IMX_DMA_2D_SLOTS; i++) { if ((imxdma->slots_2d[i].count > 0) && ((imxdma->slots_2d[i].xsr != d->x) || @@ -537,10 +536,8 @@ static int imxdma_xfer_desc(struct imxdma_desc *d) slot = i; break; } - if (slot < 0) { - spin_unlock_irqrestore(&imxdma->lock, flags); + if (slot < 0) return -EBUSY; - } imxdma->slots_2d[slot].xsr = d->x; imxdma->slots_2d[slot].ysr = d->y; @@ -549,7 +546,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d) imxdmac->slot_2d = slot; imxdmac->enabled_2d = true; - spin_unlock_irqrestore(&imxdma->lock, flags); if (slot == IMX_DMA_2D_SLOT_A) { d->config_mem &= ~CCR_MSEL_B; @@ -625,18 +621,17 @@ static void imxdma_tasklet(unsigned long data) struct imxdma_channel *imxdmac = (void *)data; struct imxdma_engine *imxdma = imxdmac->imxdma; struct imxdma_desc *desc; + unsigned long flags; - spin_lock(&imxdma->lock); + spin_lock_irqsave(&imxdma->lock, flags); if (list_empty(&imxdmac->ld_active)) { /* Someone might have called terminate all */ - goto out; + spin_unlock_irqrestore(&imxdma->lock, flags); + return; } desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node); - if (desc->desc.callback) - desc->desc.callback(desc->desc.callback_param); - /* If we are dealing with a cyclic descriptor, keep it on ld_active * and dont mark the descriptor as complete. * Only in non-cyclic cases it would be marked as complete @@ -663,7 +658,11 @@ static void imxdma_tasklet(unsigned long data) __func__, imxdmac->channel); } out: - spin_unlock(&imxdma->lock); + spin_unlock_irqrestore(&imxdma->lock, flags); + + if (desc->desc.callback) + desc->desc.callback(desc->desc.callback_param); + } static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, @@ -883,7 +882,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( kfree(imxdmac->sg_list); imxdmac->sg_list = kcalloc(periods + 1, - sizeof(struct scatterlist), GFP_KERNEL); + sizeof(struct scatterlist), GFP_ATOMIC); if (!imxdmac->sg_list) return NULL; diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c index 45a520281ce1..ebad84591a6e 100644 --- a/drivers/dma/sh/rcar-hpbdma.c +++ b/drivers/dma/sh/rcar-hpbdma.c @@ -93,6 +93,7 @@ struct hpb_dmae_chan { void __iomem *base; const struct hpb_dmae_slave_config *cfg; char dev_id[16]; /* unique name per DMAC of channel */ + dma_addr_t slave_addr; }; struct hpb_dmae_device { @@ -432,7 +433,6 @@ hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan, hpb_chan->xfer_mode = XFER_DOUBLE; } else { dev_err(hpb_chan->shdma_chan.dev, "DCR setting error"); - shdma_free_irq(&hpb_chan->shdma_chan); return -EINVAL; } @@ -446,7 +446,8 @@ hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan, return 0; } -static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try) +static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, + dma_addr_t slave_addr, bool try) { struct hpb_dmae_chan *chan = to_chan(schan); const struct hpb_dmae_slave_config *sc = @@ -457,6 +458,7 @@ static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try) if (try) return 0; chan->cfg = sc; + chan->slave_addr = slave_addr ? : sc->addr; return hpb_dmae_alloc_chan_resources(chan, sc); } @@ -468,7 +470,7 @@ static dma_addr_t hpb_dmae_slave_addr(struct shdma_chan *schan) { struct hpb_dmae_chan *chan = to_chan(schan); - return chan->cfg->addr; + return chan->slave_addr; } static struct shdma_desc *hpb_dmae_embedded_desc(void *buf, int i) @@ -614,7 +616,6 @@ static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev) shdma_for_each_chan(schan, &hpbdev->shdma_dev, i) { BUG_ON(!schan); - shdma_free_irq(schan); shdma_chan_remove(schan); } dma_dev->chancnt = 0; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 0ff43552d472..89675f862308 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -63,6 +63,7 @@ struct gpio_bank { struct gpio_chip chip; struct clk *dbck; u32 mod_usage; + u32 irq_usage; u32 dbck_enable_mask; bool dbck_enabled; struct device *dev; @@ -86,6 +87,9 @@ struct gpio_bank { #define GPIO_BIT(bank, gpio) (1 << GPIO_INDEX(bank, gpio)) #define GPIO_MOD_CTRL_BIT BIT(0) +#define BANK_USED(bank) (bank->mod_usage || bank->irq_usage) +#define LINE_USED(line, offset) (line & (1 << offset)) + static int irq_to_gpio(struct gpio_bank *bank, unsigned int gpio_irq) { return bank->chip.base + gpio_irq; @@ -420,15 +424,69 @@ static int _set_gpio_triggering(struct gpio_bank *bank, int gpio, return 0; } +static void _enable_gpio_module(struct gpio_bank *bank, unsigned offset) +{ + if (bank->regs->pinctrl) { + void __iomem *reg = bank->base + bank->regs->pinctrl; + + /* Claim the pin for MPU */ + __raw_writel(__raw_readl(reg) | (1 << offset), reg); + } + + if (bank->regs->ctrl && !BANK_USED(bank)) { + void __iomem *reg = bank->base + bank->regs->ctrl; + u32 ctrl; + + ctrl = __raw_readl(reg); + /* Module is enabled, clocks are not gated */ + ctrl &= ~GPIO_MOD_CTRL_BIT; + __raw_writel(ctrl, reg); + bank->context.ctrl = ctrl; + } +} + +static void _disable_gpio_module(struct gpio_bank *bank, unsigned offset) +{ + void __iomem *base = bank->base; + + if (bank->regs->wkup_en && + !LINE_USED(bank->mod_usage, offset) && + !LINE_USED(bank->irq_usage, offset)) { + /* Disable wake-up during idle for dynamic tick */ + _gpio_rmw(base, bank->regs->wkup_en, 1 << offset, 0); + bank->context.wake_en = + __raw_readl(bank->base + bank->regs->wkup_en); + } + + if (bank->regs->ctrl && !BANK_USED(bank)) { + void __iomem *reg = bank->base + bank->regs->ctrl; + u32 ctrl; + + ctrl = __raw_readl(reg); + /* Module is disabled, clocks are gated */ + ctrl |= GPIO_MOD_CTRL_BIT; + __raw_writel(ctrl, reg); + bank->context.ctrl = ctrl; + } +} + +static int gpio_is_input(struct gpio_bank *bank, int mask) +{ + void __iomem *reg = bank->base + bank->regs->direction; + + return __raw_readl(reg) & mask; +} + static int gpio_irq_type(struct irq_data *d, unsigned type) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); unsigned gpio = 0; int retval; unsigned long flags; + unsigned offset; - if (WARN_ON(!bank->mod_usage)) - return -EINVAL; + if (!BANK_USED(bank)) + pm_runtime_get_sync(bank->dev); #ifdef CONFIG_ARCH_OMAP1 if (d->irq > IH_MPUIO_BASE) @@ -446,7 +504,17 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) return -EINVAL; spin_lock_irqsave(&bank->lock, flags); - retval = _set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), type); + offset = GPIO_INDEX(bank, gpio); + retval = _set_gpio_triggering(bank, offset, type); + if (!LINE_USED(bank->mod_usage, offset)) { + _enable_gpio_module(bank, offset); + _set_gpio_direction(bank, offset, 1); + } else if (!gpio_is_input(bank, 1 << offset)) { + spin_unlock_irqrestore(&bank->lock, flags); + return -EINVAL; + } + + bank->irq_usage |= 1 << GPIO_INDEX(bank, gpio); spin_unlock_irqrestore(&bank->lock, flags); if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) @@ -603,35 +671,19 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) * If this is the first gpio_request for the bank, * enable the bank module. */ - if (!bank->mod_usage) + if (!BANK_USED(bank)) pm_runtime_get_sync(bank->dev); spin_lock_irqsave(&bank->lock, flags); /* Set trigger to none. You need to enable the desired trigger with - * request_irq() or set_irq_type(). + * request_irq() or set_irq_type(). Only do this if the IRQ line has + * not already been requested. */ - _set_gpio_triggering(bank, offset, IRQ_TYPE_NONE); - - if (bank->regs->pinctrl) { - void __iomem *reg = bank->base + bank->regs->pinctrl; - - /* Claim the pin for MPU */ - __raw_writel(__raw_readl(reg) | (1 << offset), reg); - } - - if (bank->regs->ctrl && !bank->mod_usage) { - void __iomem *reg = bank->base + bank->regs->ctrl; - u32 ctrl; - - ctrl = __raw_readl(reg); - /* Module is enabled, clocks are not gated */ - ctrl &= ~GPIO_MOD_CTRL_BIT; - __raw_writel(ctrl, reg); - bank->context.ctrl = ctrl; + if (!LINE_USED(bank->irq_usage, offset)) { + _set_gpio_triggering(bank, offset, IRQ_TYPE_NONE); + _enable_gpio_module(bank, offset); } - bank->mod_usage |= 1 << offset; - spin_unlock_irqrestore(&bank->lock, flags); return 0; @@ -640,31 +692,11 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) { struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip); - void __iomem *base = bank->base; unsigned long flags; spin_lock_irqsave(&bank->lock, flags); - - if (bank->regs->wkup_en) { - /* Disable wake-up during idle for dynamic tick */ - _gpio_rmw(base, bank->regs->wkup_en, 1 << offset, 0); - bank->context.wake_en = - __raw_readl(bank->base + bank->regs->wkup_en); - } - bank->mod_usage &= ~(1 << offset); - - if (bank->regs->ctrl && !bank->mod_usage) { - void __iomem *reg = bank->base + bank->regs->ctrl; - u32 ctrl; - - ctrl = __raw_readl(reg); - /* Module is disabled, clocks are gated */ - ctrl |= GPIO_MOD_CTRL_BIT; - __raw_writel(ctrl, reg); - bank->context.ctrl = ctrl; - } - + _disable_gpio_module(bank, offset); _reset_gpio(bank, bank->chip.base + offset); spin_unlock_irqrestore(&bank->lock, flags); @@ -672,7 +704,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) * If this is the last gpio to be freed in the bank, * disable the bank module. */ - if (!bank->mod_usage) + if (!BANK_USED(bank)) pm_runtime_put(bank->dev); } @@ -762,10 +794,20 @@ static void gpio_irq_shutdown(struct irq_data *d) struct gpio_bank *bank = irq_data_get_irq_chip_data(d); unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; + unsigned offset = GPIO_INDEX(bank, gpio); spin_lock_irqsave(&bank->lock, flags); + bank->irq_usage &= ~(1 << offset); + _disable_gpio_module(bank, offset); _reset_gpio(bank, gpio); spin_unlock_irqrestore(&bank->lock, flags); + + /* + * If this is the last IRQ to be freed in the bank, + * disable the bank module. + */ + if (!BANK_USED(bank)) + pm_runtime_put(bank->dev); } static void gpio_ack_irq(struct irq_data *d) @@ -897,13 +939,6 @@ static int gpio_input(struct gpio_chip *chip, unsigned offset) return 0; } -static int gpio_is_input(struct gpio_bank *bank, int mask) -{ - void __iomem *reg = bank->base + bank->regs->direction; - - return __raw_readl(reg) & mask; -} - static int gpio_get(struct gpio_chip *chip, unsigned offset) { struct gpio_bank *bank; @@ -922,13 +957,22 @@ static int gpio_output(struct gpio_chip *chip, unsigned offset, int value) { struct gpio_bank *bank; unsigned long flags; + int retval = 0; bank = container_of(chip, struct gpio_bank, chip); spin_lock_irqsave(&bank->lock, flags); + + if (LINE_USED(bank->irq_usage, offset)) { + retval = -EINVAL; + goto exit; + } + bank->set_dataout(bank, offset, value); _set_gpio_direction(bank, offset, 0); + +exit: spin_unlock_irqrestore(&bank->lock, flags); - return 0; + return retval; } static int gpio_debounce(struct gpio_chip *chip, unsigned offset, @@ -1400,7 +1444,7 @@ void omap2_gpio_prepare_for_idle(int pwr_mode) struct gpio_bank *bank; list_for_each_entry(bank, &omap_gpio_list, node) { - if (!bank->mod_usage || !bank->loses_context) + if (!BANK_USED(bank) || !bank->loses_context) continue; bank->power_mode = pwr_mode; @@ -1414,7 +1458,7 @@ void omap2_gpio_resume_after_idle(void) struct gpio_bank *bank; list_for_each_entry(bank, &omap_gpio_list, node) { - if (!bank->mod_usage || !bank->loses_context) + if (!BANK_USED(bank) || !bank->loses_context) continue; pm_runtime_get_sync(bank->dev); diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index e3745eb07570..6038966ab045 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -293,10 +293,9 @@ static void gpio_rcar_parse_pdata(struct gpio_rcar_priv *p) if (pdata) { p->config = *pdata; } else if (IS_ENABLED(CONFIG_OF) && np) { - ret = of_parse_phandle_with_args(np, "gpio-ranges", - "#gpio-range-cells", 0, &args); - p->config.number_of_pins = ret == 0 && args.args_count == 3 - ? args.args[2] + ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, + &args); + p->config.number_of_pins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; p->config.gpio_base = -1; } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1688ff500513..830f7501cb4d 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2925,6 +2925,8 @@ int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb) /* Speaker Allocation Data Block */ if (dbl == 3) { *sadb = kmalloc(dbl, GFP_KERNEL); + if (!*sadb) + return -ENOMEM; memcpy(*sadb, &db[1], dbl); count = dbl; break; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index f6f6cc7fc133..3d13ca6e257f 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -416,14 +416,6 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) return; /* - * fbdev->blank can be called from irq context in case of a panic. - * Since we already have our own special panic handler which will - * restore the fbdev console mode completely, just bail out early. - */ - if (oops_in_progress) - return; - - /* * For each CRTC in this fb, turn the connectors on/off. */ drm_modeset_lock_all(dev); diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c index 92babac362ec..2db731f00930 100644 --- a/drivers/gpu/drm/gma500/gtt.c +++ b/drivers/gpu/drm/gma500/gtt.c @@ -204,6 +204,7 @@ static int psb_gtt_attach_pages(struct gtt_range *gt) if (IS_ERR(pages)) return PTR_ERR(pages); + gt->npage = gt->gem.size / PAGE_SIZE; gt->pages = pages; return 0; diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index b1f8fc69023f..60e84043aa34 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -707,8 +707,7 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode) reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2); break; case DRM_MODE_DPMS_OFF: - /* disable audio and video ports */ - reg_write(encoder, REG_ENA_AP, 0x00); + /* disable video ports */ reg_write(encoder, REG_ENA_VP_0, 0x00); reg_write(encoder, REG_ENA_VP_1, 0x00); reg_write(encoder, REG_ENA_VP_2, 0x00); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c27a21034a5e..d5c784d48671 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1290,12 +1290,9 @@ static int i915_load_modeset_init(struct drm_device *dev) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - if (!HAS_PCH_SPLIT(dev)) { - ret = vga_client_register(dev->pdev, dev, NULL, - i915_vga_set_decode); - if (ret && ret != -ENODEV) - goto out; - } + ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode); + if (ret && ret != -ENODEV) + goto out; intel_register_dsm_handler(); @@ -1351,12 +1348,6 @@ static int i915_load_modeset_init(struct drm_device *dev) */ intel_fbdev_initial_config(dev); - /* - * Must do this after fbcon init so that - * vgacon_save_screen() works during the handover. - */ - i915_disable_vga_mem(dev); - /* Only enable hotplug handling once the fbdev is fully set up. */ dev_priv->enable_hotplug_processing = true; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index df9253d890ee..cdfb9da0e4ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4800,10 +4800,10 @@ i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) if (!mutex_trylock(&dev->struct_mutex)) { if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return SHRINK_STOP; + return 0; if (dev_priv->mm.shrinker_no_lock_stealing) - return SHRINK_STOP; + return 0; unlock = false; } @@ -4901,10 +4901,10 @@ i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) if (!mutex_trylock(&dev->struct_mutex)) { if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return 0; + return SHRINK_STOP; if (dev_priv->mm.shrinker_no_lock_stealing) - return 0; + return SHRINK_STOP; unlock = false; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index aba9d7498996..dae364f0028c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -143,8 +143,10 @@ static void i915_error_vprintf(struct drm_i915_error_state_buf *e, /* Seek the first printf which is hits start position */ if (e->pos < e->start) { - len = vsnprintf(NULL, 0, f, args); - if (!__i915_error_seek(e, len)) + va_list tmp; + + va_copy(tmp, args); + if (!__i915_error_seek(e, vsnprintf(NULL, 0, f, tmp))) return; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c159e1a6810f..38f96f65d87a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3881,6 +3881,9 @@ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) +#define HSW_SCRATCH1 0xb038 +#define HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE (1<<27) + #define HSW_FUSE_STRAP 0x42014 #define HSW_CDCLK_LIMIT (1 << 24) @@ -4728,6 +4731,9 @@ #define GEN7_ROW_CHICKEN2_GT2 0xf4f4 #define DOP_CLOCK_GATING_DISABLE (1<<0) +#define HSW_ROW_CHICKEN3 0xe49c +#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) + #define G4X_AUD_VID_DID (dev_priv->info->display_mmio_offset + 0x62020) #define INTEL_AUDIO_DEVCL 0x808629FB #define INTEL_AUDIO_DEVBLC 0x80862801 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d8a1d98693e7..581fb4b2f766 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3941,8 +3941,6 @@ static void intel_connector_check_state(struct intel_connector *connector) * consider. */ void intel_connector_dpms(struct drm_connector *connector, int mode) { - struct intel_encoder *encoder = intel_attached_encoder(connector); - /* All the simple cases only support two dpms states. */ if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; @@ -3953,10 +3951,8 @@ void intel_connector_dpms(struct drm_connector *connector, int mode) connector->dpms = mode; /* Only need to change hw state when actually enabled */ - if (encoder->base.crtc) - intel_encoder_dpms(encoder, mode); - else - WARN_ON(encoder->connectors_active != false); + if (connector->encoder) + intel_encoder_dpms(to_intel_encoder(connector->encoder), mode); intel_modeset_check_state(connector->dev); } @@ -4775,6 +4771,10 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) pipeconf = 0; + if (dev_priv->quirks & QUIRK_PIPEA_FORCE && + I915_READ(PIPECONF(intel_crtc->pipe)) & PIPECONF_ENABLE) + pipeconf |= PIPECONF_ENABLE; + if (intel_crtc->pipe == 0 && INTEL_INFO(dev)->gen < 4) { /* Enable pixel doubling when the dot clock is > 90% of the (display) * core speed. @@ -10045,33 +10045,6 @@ static void i915_disable_vga(struct drm_device *dev) POSTING_READ(vga_reg); } -static void i915_enable_vga_mem(struct drm_device *dev) -{ - /* Enable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ) | VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - } -} - -void i915_disable_vga_mem(struct drm_device *dev) -{ - /* Disable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ) & ~VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - } -} - void intel_modeset_init_hw(struct drm_device *dev) { intel_init_power_well(dev); @@ -10350,7 +10323,6 @@ void i915_redisable_vga(struct drm_device *dev) if (I915_READ(vga_reg) != VGA_DISP_DISABLE) { DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); i915_disable_vga(dev); - i915_disable_vga_mem(dev); } } @@ -10564,8 +10536,6 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_disable_fbc(dev); - i915_enable_vga_mem(dev); - intel_disable_gt_powersave(dev); ironlake_teardown_rc6(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2151d13772b8..2c555f91bfae 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -588,7 +588,18 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, DRM_DEBUG_KMS("aux_ch native nack\n"); return -EREMOTEIO; case AUX_NATIVE_REPLY_DEFER: - udelay(100); + /* + * For now, just give more slack to branch devices. We + * could check the DPCD for I2C bit rate capabilities, + * and if available, adjust the interval. We could also + * be more careful with DP-to-Legacy adapters where a + * long legacy cable may force very low I2C bit rates. + */ + if (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & + DP_DWN_STRM_PORT_PRESENT) + usleep_range(500, 600); + else + usleep_range(300, 400); continue; default: DRM_ERROR("aux_ch invalid native reply 0x%02x\n", @@ -1456,7 +1467,7 @@ static void intel_edp_psr_setup(struct intel_dp *intel_dp) /* Avoid continuous PSR exit by masking memup and hpd */ I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD); + EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP); intel_dp->psr_setup_done = true; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 28cae80495e2..9b7b68fd5d47 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -793,6 +793,5 @@ extern void hsw_pc8_disable_interrupts(struct drm_device *dev); extern void hsw_pc8_restore_interrupts(struct drm_device *dev); extern void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv); extern void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv); -extern void i915_disable_vga_mem(struct drm_device *dev); #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dd176b7296c1..f4c5e95b2d6f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3864,8 +3864,6 @@ static void valleyview_enable_rps(struct drm_device *dev) dev_priv->rps.rpe_delay), dev_priv->rps.rpe_delay); - INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work); - valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay); gen6_enable_rps_interrupts(dev); @@ -4955,6 +4953,11 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + /* L3 caching of data atomics doesn't work -- disable it. */ + I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + I915_WRITE(HSW_ROW_CHICKEN3, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); + /* This is required by WaCatErrorRejectionIssue:hsw */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | @@ -5681,5 +5684,7 @@ void intel_pm_init(struct drm_device *dev) INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); + + INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work); } diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index f2c6d7909ae2..dd6f84bf6c22 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -916,6 +916,14 @@ intel_tv_compute_config(struct intel_encoder *encoder, DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; + /* TV has it's own notion of sync and other mode flags, so clear them. */ + pipe_config->adjusted_mode.flags = 0; + + /* + * FIXME: We don't check whether the input mode is actually what we want + * or whether userspace is doing something stupid. + */ + return true; } diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c index 5db5bbaedae2..bc7fd11ad8be 100644 --- a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c @@ -19,8 +19,6 @@ #include "msm_drv.h" #include "mdp4_kms.h" -#include <mach/iommu.h> - static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev); static int mdp4_hw_init(struct msm_kms *kms) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 008d772384c7..b3a2f1629041 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -18,8 +18,6 @@ #include "msm_drv.h" #include "msm_gpu.h" -#include <mach/iommu.h> - static void msm_fb_output_poll_changed(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; @@ -62,6 +60,8 @@ int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu, int i, ret; for (i = 0; i < cnt; i++) { + /* TODO maybe some day msm iommu won't require this hack: */ + struct device *msm_iommu_get_ctx(const char *ctx_name); struct device *ctx = msm_iommu_get_ctx(names[i]); if (!ctx) continue; @@ -199,7 +199,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) * imx drm driver on iMX5 */ dev_err(dev->dev, "failed to load kms\n"); - ret = PTR_ERR(priv->kms); + ret = PTR_ERR(kms); goto fail; } @@ -697,7 +697,7 @@ static struct drm_driver msm_driver = { .gem_vm_ops = &vm_ops, .dumb_create = msm_gem_dumb_create, .dumb_map_offset = msm_gem_dumb_map_offset, - .dumb_destroy = msm_gem_dumb_destroy, + .dumb_destroy = drm_gem_dumb_destroy, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, .debugfs_cleanup = msm_debugfs_cleanup, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 29eacfa29cfb..2bae46c66a30 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -319,13 +319,6 @@ int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, MSM_BO_SCANOUT | MSM_BO_WC, &args->handle); } -int msm_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev, - uint32_t handle) -{ - /* No special work needed, drop the reference and see what falls out */ - return drm_gem_handle_delete(file, handle); -} - int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, uint32_t handle, uint64_t *offset) { diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c index 37712a6df923..e290cfa4acee 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c @@ -113,7 +113,7 @@ nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine, pmc->use_msi = false; break; default: - pmc->use_msi = nouveau_boolopt(device->cfgopt, "NvMSI", true); + pmc->use_msi = nouveau_boolopt(device->cfgopt, "NvMSI", false); if (pmc->use_msi) { pmc->use_msi = pci_enable_msi(device->pdev) == 0; if (pmc->use_msi) { diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index 05ff315e8e9e..9b6950d9b3c0 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -1168,6 +1168,23 @@ static const struct radeon_blacklist_clocks btc_blacklist_clocks[] = { 25000, 30000, RADEON_SCLK_UP } }; +void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table, + u32 *max_clock) +{ + u32 i, clock = 0; + + if ((table == NULL) || (table->count == 0)) { + *max_clock = clock; + return; + } + + for (i = 0; i < table->count; i++) { + if (clock < table->entries[i].clk) + clock = table->entries[i].clk; + } + *max_clock = clock; +} + void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, u32 clock, u16 max_voltage, u16 *voltage) { @@ -1913,7 +1930,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; tmp = RREG32(MC_PMG_CMD_MRS); @@ -1928,7 +1945,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; case MC_SEQ_RESERVE_M >> 2: @@ -1942,7 +1959,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev, } j++; - if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; default: @@ -2080,6 +2097,7 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; if ((rdev->pm.dpm.new_active_crtc_count > 1) || btc_dpm_vblank_too_short(rdev)) @@ -2121,6 +2139,39 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev, ps->low.vddci = max_limits->vddci; } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + if (max_sclk_vddc) { + if (ps->low.sclk > max_sclk_vddc) + ps->low.sclk = max_sclk_vddc; + if (ps->medium.sclk > max_sclk_vddc) + ps->medium.sclk = max_sclk_vddc; + if (ps->high.sclk > max_sclk_vddc) + ps->high.sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->low.mclk > max_mclk_vddci) + ps->low.mclk = max_mclk_vddci; + if (ps->medium.mclk > max_mclk_vddci) + ps->medium.mclk = max_mclk_vddci; + if (ps->high.mclk > max_mclk_vddci) + ps->high.mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->low.mclk > max_mclk_vddc) + ps->low.mclk = max_mclk_vddc; + if (ps->medium.mclk > max_mclk_vddc) + ps->medium.mclk = max_mclk_vddc; + if (ps->high.mclk > max_mclk_vddc) + ps->high.mclk = max_mclk_vddc; + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { diff --git a/drivers/gpu/drm/radeon/btc_dpm.h b/drivers/gpu/drm/radeon/btc_dpm.h index 1a15e0e41950..3b6f12b7760b 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.h +++ b/drivers/gpu/drm/radeon/btc_dpm.h @@ -46,6 +46,8 @@ void btc_adjust_clock_combinations(struct radeon_device *rdev, struct rv7xx_pl *pl); void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, u32 clock, u16 max_voltage, u16 *voltage); +void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table, + u32 *max_clock); void btc_apply_voltage_delta_rules(struct radeon_device *rdev, u16 max_vddc, u16 max_vddci, u16 *vddc, u16 *vddci); diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 899627443030..51e947a97edf 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -146,6 +146,8 @@ static const struct ci_pt_config_reg didt_config_ci[] = }; extern u8 rv770_get_memory_module_index(struct radeon_device *rdev); +extern void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table, + u32 *max_clock); extern int ni_copy_and_switch_arb_sets(struct radeon_device *rdev, u32 arb_freq_src, u32 arb_freq_dest); extern u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock); @@ -712,6 +714,7 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_clock_and_voltage_limits *max_limits; bool disable_mclk_switching; u32 sclk, mclk; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; if ((rdev->pm.dpm.new_active_crtc_count > 1) || @@ -739,6 +742,29 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, } } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index adbdb6503b05..b874ccdf52f7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -77,6 +77,8 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); +static void cik_fini_pg(struct radeon_device *rdev); +static void cik_fini_cg(struct radeon_device *rdev); static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, bool enable); @@ -2845,10 +2847,8 @@ static void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.tile_config |= (3 << 0); break; } - if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) - rdev->config.cik.tile_config |= 1 << 4; - else - rdev->config.cik.tile_config |= 0 << 4; + rdev->config.cik.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; rdev->config.cik.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.cik.tile_config |= @@ -4187,6 +4187,10 @@ static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + /* disable CG/PG */ + cik_fini_pg(rdev); + cik_fini_cg(rdev); + /* stop the rlc */ cik_rlc_stop(rdev); @@ -4456,8 +4460,8 @@ static int cik_mc_init(struct radeon_device *rdev) rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); /* size in MB on si */ - rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; - rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; + rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; + rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; rdev->mc.visible_vram_size = rdev->mc.aper_size; si_vram_gtt_location(rdev, &rdev->mc); radeon_update_bandwidth_info(rdev); @@ -4735,12 +4739,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; - char *block = (char *)&mc_client; + char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, + (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; - printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n", + printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", protections, vmid, addr, (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", - block, mc_id); + block, mc_client, mc_id); } /** diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 555164e270a7..b5c67a99dda9 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3131,7 +3131,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.sx_max_export_size = 256; rdev->config.evergreen.sx_max_export_pos_size = 64; rdev->config.evergreen.sx_max_export_smx_size = 192; - rdev->config.evergreen.max_hw_contexts = 8; + rdev->config.evergreen.max_hw_contexts = 4; rdev->config.evergreen.sq_num_cf_insts = 2; rdev->config.evergreen.sc_prim_fifo_size = 0x40; diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index f71ce390aebe..f815c20640bd 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -288,8 +288,7 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */ WREG32(HDMI_ACR_PACKET_CONTROL + offset, - HDMI_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */ - HDMI_ACR_SOURCE); /* select SW CTS value */ + HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ evergreen_hdmi_update_ACR(encoder, mode->clock); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 8768fd6a1e27..4f6d2962767d 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -1501,7 +1501,7 @@ * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -1516,7 +1516,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index 6c398a456d78..f26339028154 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -787,6 +787,7 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; if ((rdev->pm.dpm.new_active_crtc_count > 1) || @@ -813,6 +814,29 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev, } } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 24175717307b..d71333033b2b 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2933,9 +2933,11 @@ static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); seq_printf(m, "%u dwords in ring\n", count); - for (j = 0; j <= count; j++) { - i = (rdp + j) & ring->ptr_mask; - seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + if (ring->ready) { + for (j = 0; j <= count; j++) { + i = (rdp + j) & ring->ptr_mask; + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); + } } return 0; } diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e65f211a7be0..5513d8f06252 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1084,7 +1084,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].dclk = le16_to_cpu(uvd_clk->usDClkLow) | (uvd_clk->ucDClkHigh << 16); rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v = - le16_to_cpu(limits->entries[i].usVoltage); + le16_to_cpu(entry->usVoltage); entry = (ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *) ((u8 *)entry + sizeof(ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record)); } diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f443010ce90b..5b729319f27b 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -57,15 +57,15 @@ enum r600_hdmi_iec_status_bits { static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = { /* 32kHz 44.1kHz 48kHz */ /* Clock N CTS N CTS N CTS */ - { 25174, 4576, 28125, 7007, 31250, 6864, 28125 }, /* 25,20/1.001 MHz */ + { 25175, 4576, 28125, 7007, 31250, 6864, 28125 }, /* 25,20/1.001 MHz */ { 25200, 4096, 25200, 6272, 28000, 6144, 25200 }, /* 25.20 MHz */ { 27000, 4096, 27000, 6272, 30000, 6144, 27000 }, /* 27.00 MHz */ { 27027, 4096, 27027, 6272, 30030, 6144, 27027 }, /* 27.00*1.001 MHz */ { 54000, 4096, 54000, 6272, 60000, 6144, 54000 }, /* 54.00 MHz */ { 54054, 4096, 54054, 6272, 60060, 6144, 54054 }, /* 54.00*1.001 MHz */ - { 74175, 11648, 210937, 17836, 234375, 11648, 140625 }, /* 74.25/1.001 MHz */ + { 74176, 11648, 210937, 17836, 234375, 11648, 140625 }, /* 74.25/1.001 MHz */ { 74250, 4096, 74250, 6272, 82500, 6144, 74250 }, /* 74.25 MHz */ - { 148351, 11648, 421875, 8918, 234375, 5824, 140625 }, /* 148.50/1.001 MHz */ + { 148352, 11648, 421875, 8918, 234375, 5824, 140625 }, /* 148.50/1.001 MHz */ { 148500, 4096, 148500, 6272, 165000, 6144, 148500 }, /* 148.50 MHz */ { 0, 4096, 0, 6272, 0, 6144, 0 } /* Other */ }; @@ -75,8 +75,15 @@ static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = { */ static void r600_hdmi_calc_cts(uint32_t clock, int *CTS, int N, int freq) { - if (*CTS == 0) - *CTS = clock * N / (128 * freq) * 1000; + u64 n; + u32 d; + + if (*CTS == 0) { + n = (u64)clock * (u64)N * 1000ULL; + d = 128 * freq; + do_div(n, d); + *CTS = n; + } DRM_DEBUG("Using ACR timing N=%d CTS=%d for frequency %d\n", N, *CTS, freq); } @@ -257,10 +264,7 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock) * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ - if (ASIC_IS_DCE3(rdev)) { - /* according to the reg specs, this should DCE3.2 only, but in - * practice it seems to cover DCE3.0 as well. - */ + if (ASIC_IS_DCE32(rdev)) { if (dig->dig_encoder == 0) { dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK; dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio); @@ -276,8 +280,21 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock) WREG32(DCCG_AUDIO_DTO1_MODULE, dto_modulo); WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */ } + } else if (ASIC_IS_DCE3(rdev)) { + /* according to the reg specs, this should DCE3.2 only, but in + * practice it seems to cover DCE3.0/3.1 as well. + */ + if (dig->dig_encoder == 0) { + WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */ + } else { + WREG32(DCCG_AUDIO_DTO1_PHASE, base_rate * 100); + WREG32(DCCG_AUDIO_DTO1_MODULE, clock * 100); + WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */ + } } else { - /* according to the reg specs, this should be DCE2.0 and DCE3.0 */ + /* according to the reg specs, this should be DCE2.0 and DCE3.0/3.1 */ WREG32(AUDIO_DTO, AUDIO_DTO_PHASE(base_rate / 10) | AUDIO_DTO_MODULE(clock / 10)); } @@ -434,8 +451,8 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod } WREG32(HDMI0_ACR_PACKET_CONTROL + offset, - HDMI0_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */ - HDMI0_ACR_SOURCE); /* select SW CTS value */ + HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */ + HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ WREG32(HDMI0_VBI_PACKET_CONTROL + offset, HDMI0_NULL_SEND | /* send null packets when required */ diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index e673fe26ea84..7b3c7b5932c5 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1523,7 +1523,7 @@ */ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 5003385a7512..8f7e04538fd6 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1004,6 +1004,8 @@ static struct radeon_asic rv6xx_asic = { .wait_for_vblank = &avivo_wait_for_vblank, .set_backlight_level = &atombios_set_backlight_level, .get_backlight_level = &atombios_get_backlight_level, + .hdmi_enable = &r600_hdmi_enable, + .hdmi_setmode = &r600_hdmi_setmode, }, .copy = { .blit = &r600_copy_cpdma, diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 404e25d285ba..f79ee184ffd5 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1367,6 +1367,7 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, int index = GetIndexIntoMasterTable(DATA, PPLL_SS_Info); uint16_t data_offset, size; struct _ATOM_SPREAD_SPECTRUM_INFO *ss_info; + struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT *ss_assign; uint8_t frev, crev; int i, num_indices; @@ -1378,18 +1379,21 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_SPREAD_SPECTRUM_ASSIGNMENT); - + ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*) + ((u8 *)&ss_info->asSS_Info[0]); for (i = 0; i < num_indices; i++) { - if (ss_info->asSS_Info[i].ucSS_Id == id) { + if (ss_assign->ucSS_Id == id) { ss->percentage = - le16_to_cpu(ss_info->asSS_Info[i].usSpreadSpectrumPercentage); - ss->type = ss_info->asSS_Info[i].ucSpreadSpectrumType; - ss->step = ss_info->asSS_Info[i].ucSS_Step; - ss->delay = ss_info->asSS_Info[i].ucSS_Delay; - ss->range = ss_info->asSS_Info[i].ucSS_Range; - ss->refdiv = ss_info->asSS_Info[i].ucRecommendedRef_Div; + le16_to_cpu(ss_assign->usSpreadSpectrumPercentage); + ss->type = ss_assign->ucSpreadSpectrumType; + ss->step = ss_assign->ucSS_Step; + ss->delay = ss_assign->ucSS_Delay; + ss->range = ss_assign->ucSS_Range; + ss->refdiv = ss_assign->ucRecommendedRef_Div; return true; } + ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*) + ((u8 *)ss_assign + sizeof(struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT)); } } return false; @@ -1477,6 +1481,12 @@ union asic_ss_info { struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3; }; +union asic_ss_assignment { + struct _ATOM_ASIC_SS_ASSIGNMENT v1; + struct _ATOM_ASIC_SS_ASSIGNMENT_V2 v2; + struct _ATOM_ASIC_SS_ASSIGNMENT_V3 v3; +}; + bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, struct radeon_atom_ss *ss, int id, u32 clock) @@ -1485,6 +1495,7 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); uint16_t data_offset, size; union asic_ss_info *ss_info; + union asic_ss_assignment *ss_assign; uint8_t frev, crev; int i, num_indices; @@ -1509,45 +1520,52 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v1.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v1.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadRateInKhz); + le16_to_cpu(ss_assign->v1.usSpreadSpectrumPercentage); + ss->type = ss_assign->v1.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v1.usSpreadRateInKhz); return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT)); } break; case 2: num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_2.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info_2.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info_2.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v2.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v2.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info_2.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadRateIn10Hz); + le16_to_cpu(ss_assign->v2.usSpreadSpectrumPercentage); + ss->type = ss_assign->v2.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v2.usSpreadRateIn10Hz); if ((crev == 2) && ((id == ASIC_INTERNAL_ENGINE_SS) || (id == ASIC_INTERNAL_MEMORY_SS))) ss->rate /= 100; return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2)); } break; case 3: num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3); + ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_3.asSpreadSpectrum[0]); for (i = 0; i < num_indices; i++) { - if ((ss_info->info_3.asSpreadSpectrum[i].ucClockIndication == id) && - (clock <= le32_to_cpu(ss_info->info_3.asSpreadSpectrum[i].ulTargetClockRange))) { + if ((ss_assign->v3.ucClockIndication == id) && + (clock <= le32_to_cpu(ss_assign->v3.ulTargetClockRange))) { ss->percentage = - le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadSpectrumPercentage); - ss->type = ss_info->info_3.asSpreadSpectrum[i].ucSpreadSpectrumMode; - ss->rate = le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadRateIn10Hz); + le16_to_cpu(ss_assign->v3.usSpreadSpectrumPercentage); + ss->type = ss_assign->v3.ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_assign->v3.usSpreadRateIn10Hz); if ((id == ASIC_INTERNAL_ENGINE_SS) || (id == ASIC_INTERNAL_MEMORY_SS)) ss->rate /= 100; @@ -1555,6 +1573,8 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, radeon_atombios_get_igp_ss_overrides(rdev, ss, id); return true; } + ss_assign = (union asic_ss_assignment *) + ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3)); } break; default: diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index ac6ece61a476..66c222836631 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -85,8 +85,9 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) VRAM, also but everything into VRAM on AGP cards to avoid image corruptions */ if (p->ring == R600_RING_TYPE_UVD_INDEX && - (i == 0 || p->rdev->flags & RADEON_IS_AGP)) { - /* TODO: is this still needed for NI+ ? */ + p->rdev->family < CHIP_PALM && + (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { + p->relocs[i].lobj.domain = RADEON_GEM_DOMAIN_VRAM; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e29faa73b574..841d0e09be3e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1320,13 +1320,22 @@ int radeon_device_init(struct radeon_device *rdev, return r; } if ((radeon_testing & 1)) { - radeon_test_moves(rdev); + if (rdev->accel_working) + radeon_test_moves(rdev); + else + DRM_INFO("radeon: acceleration disabled, skipping move tests\n"); } if ((radeon_testing & 2)) { - radeon_test_syncing(rdev); + if (rdev->accel_working) + radeon_test_syncing(rdev); + else + DRM_INFO("radeon: acceleration disabled, skipping sync tests\n"); } if (radeon_benchmarking) { - radeon_benchmark(rdev, radeon_benchmarking); + if (rdev->accel_working) + radeon_benchmark(rdev, radeon_benchmarking); + else + DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n"); } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 87e1d69e8fdb..4f6b7fc7ad3c 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -945,6 +945,8 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) if (enable) { mutex_lock(&rdev->pm.mutex); rdev->pm.dpm.uvd_active = true; + /* disable this for now */ +#if 0 if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) @@ -954,6 +956,7 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; else +#endif dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; rdev->pm.dpm.state = dpm_state; mutex_unlock(&rdev->pm.mutex); @@ -1002,7 +1005,7 @@ static void radeon_pm_resume_old(struct radeon_device *rdev) { /* set up the default clocks if the MC ucode is loaded */ if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1046,7 +1049,7 @@ static void radeon_pm_resume_dpm(struct radeon_device *rdev) if (ret) { DRM_ERROR("radeon: dpm resume failed\n"); if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1097,7 +1100,7 @@ static int radeon_pm_init_old(struct radeon_device *rdev) radeon_pm_init_profile(rdev); /* set up the default clocks if the MC ucode is loaded */ if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, @@ -1183,7 +1186,7 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) if (ret) { rdev->pm.dpm_enabled = false; if ((rdev->family >= CHIP_BARTS) && - (rdev->family <= CHIP_HAINAN) && + (rdev->family <= CHIP_CAYMAN) && rdev->mc_fw) { if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 46a25f037b84..18254e1c3e71 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -839,9 +839,11 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) * packet that is the root issue */ i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; - for (j = 0; j <= (count + 32); j++) { - seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]); - i = (i + 1) & ring->ptr_mask; + if (ring->ready) { + for (j = 0; j <= (count + 32); j++) { + seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]); + i = (i + 1) & ring->ptr_mask; + } } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index f4d6bcee9006..12e8099a0823 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -36,8 +36,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) struct radeon_bo *vram_obj = NULL; struct radeon_bo **gtt_obj = NULL; uint64_t gtt_addr, vram_addr; - unsigned i, n, size; - int r, ring; + unsigned n, size; + int i, r, ring; switch (flag) { case RADEON_TEST_COPY_DMA: diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 1a01bbff9bfa..4f2e73f79638 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -476,8 +476,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, return -EINVAL; } - /* TODO: is this still necessary on NI+ ? */ - if ((cmd == 0 || cmd == 0x3) && + if (p->rdev->family < CHIP_PALM && (cmd == 0 || cmd == 0x3) && (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", start, end); @@ -799,7 +798,8 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) (rdev->pm.dpm.hd != hd)) { rdev->pm.dpm.sd = sd; rdev->pm.dpm.hd = hd; - streams_changed = true; + /* disable this for now */ + /*streams_changed = true;*/ } } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c354c1094967..d4652af425b8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -85,6 +85,9 @@ extern void si_dma_vm_set_page(struct radeon_device *rdev, uint32_t incr, uint32_t flags); static void si_enable_gui_idle_interrupt(struct radeon_device *rdev, bool enable); +static void si_fini_pg(struct radeon_device *rdev); +static void si_fini_cg(struct radeon_device *rdev); +static void si_rlc_stop(struct radeon_device *rdev); static const u32 verde_rlc_save_restore_register_list[] = { @@ -3608,6 +3611,13 @@ static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + /* disable PG/CG */ + si_fini_pg(rdev); + si_fini_cg(rdev); + + /* stop the rlc */ + si_rlc_stop(rdev); + /* Disable CP parsing/prefetching */ WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index cfe5d4d28915..2332aa1bf93c 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2910,6 +2910,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_sclk_switching = false; u32 mclk, sclk; u16 vddc, vddci; + u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; if ((rdev->pm.dpm.new_active_crtc_count > 1) || @@ -2943,6 +2944,29 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, } } + /* limit clocks to max supported clocks based on voltage dependency tables */ + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, + &max_sclk_vddc); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk, + &max_mclk_vddci); + btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk, + &max_mclk_vddc); + + for (i = 0; i < ps->performance_level_count; i++) { + if (max_sclk_vddc) { + if (ps->performance_levels[i].sclk > max_sclk_vddc) + ps->performance_levels[i].sclk = max_sclk_vddc; + } + if (max_mclk_vddci) { + if (ps->performance_levels[i].mclk > max_mclk_vddci) + ps->performance_levels[i].mclk = max_mclk_vddci; + } + if (max_mclk_vddc) { + if (ps->performance_levels[i].mclk > max_mclk_vddc) + ps->performance_levels[i].mclk = max_mclk_vddc; + } + } + /* XXX validate the min clocks required for display */ if (disable_mclk_switching) { @@ -5184,7 +5208,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; if (!pi->mem_gddr5) { @@ -5194,7 +5218,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, table->mc_reg_table_entry[k].mc_data[j] = (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; } break; @@ -5207,7 +5231,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev, (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); j++; - if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; break; default: diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 52d2ab6b67a0..7e2e0ea66a00 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1553,7 +1553,7 @@ * 6. COMMAND [30:21] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -1568,7 +1568,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 7f998bf1cc9d..9364129ba292 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1868,7 +1868,7 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; - pi->enable_bapm = true; + pi->enable_bapm = false; pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 7266805d9786..3100fa9cb52f 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -212,8 +212,8 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* enable VCPU clock */ WREG32(UVD_VCPU_CNTL, 1 << 9); - /* enable UMC */ - WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + /* enable UMC and NC0 */ + WREG32_P(UVD_LMI_CTRL2, 1 << 13, ~((1 << 8) | (1 << 13))); /* boot up the VCPU */ WREG32(UVD_SOFT_RESET, 0); diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 71b70e3a7a71..c91d547191dd 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -241,6 +241,7 @@ config HID_HOLTEK - Sharkoon Drakonia / Perixx MX-2000 gaming mice - Tracer Sniper TRM-503 / NOVA Gaming Slider X200 / Zalman ZM-GM1 + - SHARKOON DarkGlider Gaming mouse config HOLTEK_FF bool "Holtek On Line Grip force feedback support" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index b8470b1a10fe..5a8c01112a23 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1715,6 +1715,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, + { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_580) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index 7e6db3cf46f9..e696566cde46 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -27,6 +27,7 @@ * - USB ID 04d9:a067, sold as Sharkoon Drakonia and Perixx MX-2000 * - USB ID 04d9:a04a, sold as Tracer Sniper TRM-503, NOVA Gaming Slider X200 * and Zalman ZM-GM1 + * - USB ID 04d9:a081, sold as SHARKOON DarkGlider Gaming mouse */ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, @@ -46,6 +47,7 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, } break; case USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A: + case USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081: if (*rsize >= 113 && rdesc[106] == 0xff && rdesc[107] == 0x7f && rdesc[111] == 0xff && rdesc[112] == 0x7f) { hid_info(hdev, "Fixing up report descriptor\n"); @@ -63,6 +65,8 @@ static const struct hid_device_id holtek_mouse_devices[] = { USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) }, + { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, + USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { } }; MODULE_DEVICE_TABLE(hid, holtek_mouse_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e60e8d530697..9cbc7ab07dfa 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -450,6 +450,7 @@ #define USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD 0xa055 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067 0xa067 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A 0xa04a +#define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081 0xa081 #define USB_VENDOR_ID_IMATION 0x0718 #define USB_DEVICE_ID_DISC_STAKKA 0xd000 diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c index 602c188e9d86..6101816a7ddd 100644 --- a/drivers/hid/hid-roccat-kone.c +++ b/drivers/hid/hid-roccat-kone.c @@ -382,7 +382,7 @@ static ssize_t kone_sysfs_write_profilex(struct file *fp, } #define PROFILE_ATTR(number) \ static struct bin_attribute bin_attr_profile##number = { \ - .attr = { .name = "profile##number", .mode = 0660 }, \ + .attr = { .name = "profile" #number, .mode = 0660 }, \ .size = sizeof(struct kone_profile), \ .read = kone_sysfs_read_profilex, \ .write = kone_sysfs_write_profilex, \ diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c index 5ddf605b6b89..5e99fcdc71b9 100644 --- a/drivers/hid/hid-roccat-koneplus.c +++ b/drivers/hid/hid-roccat-koneplus.c @@ -229,13 +229,13 @@ static ssize_t koneplus_sysfs_read_profilex_buttons(struct file *fp, #define PROFILE_ATTR(number) \ static struct bin_attribute bin_attr_profile##number##_settings = { \ - .attr = { .name = "profile##number##_settings", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_settings", .mode = 0440 }, \ .size = KONEPLUS_SIZE_PROFILE_SETTINGS, \ .read = koneplus_sysfs_read_profilex_settings, \ .private = &profile_numbers[number-1], \ }; \ static struct bin_attribute bin_attr_profile##number##_buttons = { \ - .attr = { .name = "profile##number##_buttons", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \ .size = KONEPLUS_SIZE_PROFILE_BUTTONS, \ .read = koneplus_sysfs_read_profilex_buttons, \ .private = &profile_numbers[number-1], \ diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c index 515bc03136c0..0c8e1ef0b67d 100644 --- a/drivers/hid/hid-roccat-kovaplus.c +++ b/drivers/hid/hid-roccat-kovaplus.c @@ -257,13 +257,13 @@ static ssize_t kovaplus_sysfs_read_profilex_buttons(struct file *fp, #define PROFILE_ATTR(number) \ static struct bin_attribute bin_attr_profile##number##_settings = { \ - .attr = { .name = "profile##number##_settings", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_settings", .mode = 0440 }, \ .size = KOVAPLUS_SIZE_PROFILE_SETTINGS, \ .read = kovaplus_sysfs_read_profilex_settings, \ .private = &profile_numbers[number-1], \ }; \ static struct bin_attribute bin_attr_profile##number##_buttons = { \ - .attr = { .name = "profile##number##_buttons", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \ .size = KOVAPLUS_SIZE_PROFILE_BUTTONS, \ .read = kovaplus_sysfs_read_profilex_buttons, \ .private = &profile_numbers[number-1], \ diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 5a6dbbeee790..1a07e07d99a0 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -225,13 +225,13 @@ static ssize_t pyra_sysfs_read_profilex_buttons(struct file *fp, #define PROFILE_ATTR(number) \ static struct bin_attribute bin_attr_profile##number##_settings = { \ - .attr = { .name = "profile##number##_settings", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_settings", .mode = 0440 }, \ .size = PYRA_SIZE_PROFILE_SETTINGS, \ .read = pyra_sysfs_read_profilex_settings, \ .private = &profile_numbers[number-1], \ }; \ static struct bin_attribute bin_attr_profile##number##_buttons = { \ - .attr = { .name = "profile##number##_buttons", .mode = 0440 }, \ + .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \ .size = PYRA_SIZE_PROFILE_BUTTONS, \ .read = pyra_sysfs_read_profilex_buttons, \ .private = &profile_numbers[number-1], \ diff --git a/drivers/hid/hid-wiimote-modules.c b/drivers/hid/hid-wiimote-modules.c index 2e7d644dba18..71adf9e60b13 100644 --- a/drivers/hid/hid-wiimote-modules.c +++ b/drivers/hid/hid-wiimote-modules.c @@ -119,12 +119,22 @@ static const struct wiimod_ops wiimod_keys = { * the rumble motor, this flag shouldn't be set. */ +/* used by wiimod_rumble and wiipro_rumble */ +static void wiimod_rumble_worker(struct work_struct *work) +{ + struct wiimote_data *wdata = container_of(work, struct wiimote_data, + rumble_worker); + + spin_lock_irq(&wdata->state.lock); + wiiproto_req_rumble(wdata, wdata->state.cache_rumble); + spin_unlock_irq(&wdata->state.lock); +} + static int wiimod_rumble_play(struct input_dev *dev, void *data, struct ff_effect *eff) { struct wiimote_data *wdata = input_get_drvdata(dev); __u8 value; - unsigned long flags; /* * The wiimote supports only a single rumble motor so if any magnitude @@ -137,9 +147,10 @@ static int wiimod_rumble_play(struct input_dev *dev, void *data, else value = 0; - spin_lock_irqsave(&wdata->state.lock, flags); - wiiproto_req_rumble(wdata, value); - spin_unlock_irqrestore(&wdata->state.lock, flags); + /* Locking state.lock here might deadlock with input_event() calls. + * schedule_work acts as barrier. Merging multiple changes is fine. */ + wdata->state.cache_rumble = value; + schedule_work(&wdata->rumble_worker); return 0; } @@ -147,6 +158,8 @@ static int wiimod_rumble_play(struct input_dev *dev, void *data, static int wiimod_rumble_probe(const struct wiimod_ops *ops, struct wiimote_data *wdata) { + INIT_WORK(&wdata->rumble_worker, wiimod_rumble_worker); + set_bit(FF_RUMBLE, wdata->input->ffbit); if (input_ff_create_memless(wdata->input, NULL, wiimod_rumble_play)) return -ENOMEM; @@ -159,6 +172,8 @@ static void wiimod_rumble_remove(const struct wiimod_ops *ops, { unsigned long flags; + cancel_work_sync(&wdata->rumble_worker); + spin_lock_irqsave(&wdata->state.lock, flags); wiiproto_req_rumble(wdata, 0); spin_unlock_irqrestore(&wdata->state.lock, flags); @@ -1731,7 +1746,6 @@ static int wiimod_pro_play(struct input_dev *dev, void *data, { struct wiimote_data *wdata = input_get_drvdata(dev); __u8 value; - unsigned long flags; /* * The wiimote supports only a single rumble motor so if any magnitude @@ -1744,9 +1758,10 @@ static int wiimod_pro_play(struct input_dev *dev, void *data, else value = 0; - spin_lock_irqsave(&wdata->state.lock, flags); - wiiproto_req_rumble(wdata, value); - spin_unlock_irqrestore(&wdata->state.lock, flags); + /* Locking state.lock here might deadlock with input_event() calls. + * schedule_work acts as barrier. Merging multiple changes is fine. */ + wdata->state.cache_rumble = value; + schedule_work(&wdata->rumble_worker); return 0; } @@ -1756,6 +1771,8 @@ static int wiimod_pro_probe(const struct wiimod_ops *ops, { int ret, i; + INIT_WORK(&wdata->rumble_worker, wiimod_rumble_worker); + wdata->extension.input = input_allocate_device(); if (!wdata->extension.input) return -ENOMEM; @@ -1817,12 +1834,13 @@ static void wiimod_pro_remove(const struct wiimod_ops *ops, if (!wdata->extension.input) return; + input_unregister_device(wdata->extension.input); + wdata->extension.input = NULL; + cancel_work_sync(&wdata->rumble_worker); + spin_lock_irqsave(&wdata->state.lock, flags); wiiproto_req_rumble(wdata, 0); spin_unlock_irqrestore(&wdata->state.lock, flags); - - input_unregister_device(wdata->extension.input); - wdata->extension.input = NULL; } static const struct wiimod_ops wiimod_pro = { diff --git a/drivers/hid/hid-wiimote.h b/drivers/hid/hid-wiimote.h index f1474f372c0b..75db0c400037 100644 --- a/drivers/hid/hid-wiimote.h +++ b/drivers/hid/hid-wiimote.h @@ -133,13 +133,15 @@ struct wiimote_state { __u8 *cmd_read_buf; __u8 cmd_read_size; - /* calibration data */ + /* calibration/cache data */ __u16 calib_bboard[4][3]; + __u8 cache_rumble; }; struct wiimote_data { struct hid_device *hdev; struct input_dev *input; + struct work_struct rumble_worker; struct led_classdev *leds[4]; struct input_dev *accel; struct input_dev *ir; diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 8918dd12bb69..6a6dd5cd7833 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -308,18 +308,25 @@ static int hidraw_fasync(int fd, struct file *file, int on) static void drop_ref(struct hidraw *hidraw, int exists_bit) { if (exists_bit) { - hid_hw_close(hidraw->hid); hidraw->exist = 0; - if (hidraw->open) + if (hidraw->open) { + hid_hw_close(hidraw->hid); wake_up_interruptible(&hidraw->wait); + } } else { --hidraw->open; } - - if (!hidraw->open && !hidraw->exist) { - device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); - hidraw_table[hidraw->minor] = NULL; - kfree(hidraw); + if (!hidraw->open) { + if (!hidraw->exist) { + device_destroy(hidraw_class, + MKDEV(hidraw_major, hidraw->minor)); + hidraw_table[hidraw->minor] = NULL; + kfree(hidraw); + } else { + /* close device for last reader */ + hid_hw_power(hidraw->hid, PM_HINT_NORMAL); + hid_hw_close(hidraw->hid); + } } } diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 5bf2fb785844..93b00d76374c 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -615,7 +615,7 @@ static const struct file_operations uhid_fops = { static struct miscdevice uhid_misc = { .fops = &uhid_fops, - .minor = MISC_DYNAMIC_MINOR, + .minor = UHID_MINOR, .name = UHID_NAME, }; @@ -634,4 +634,5 @@ module_exit(uhid_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>"); MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem"); +MODULE_ALIAS_MISCDEV(UHID_MINOR); MODULE_ALIAS("devname:" UHID_NAME); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 8f4743ab5fb2..936093e0271e 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -195,7 +195,7 @@ int vmbus_connect(void) do { ret = vmbus_negotiate_version(msginfo, version); - if (ret) + if (ret == -ETIMEDOUT) goto cleanup; if (vmbus_connection.conn_state == CONNECTED) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 28b03325b872..09988b289622 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -32,13 +32,17 @@ /* * Pre win8 version numbers used in ws2008 and ws 2008 r2 (win7) */ +#define WS2008_SRV_MAJOR 1 +#define WS2008_SRV_MINOR 0 +#define WS2008_SRV_VERSION (WS2008_SRV_MAJOR << 16 | WS2008_SRV_MINOR) + #define WIN7_SRV_MAJOR 3 #define WIN7_SRV_MINOR 0 -#define WIN7_SRV_MAJOR_MINOR (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR) +#define WIN7_SRV_VERSION (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR) #define WIN8_SRV_MAJOR 4 #define WIN8_SRV_MINOR 0 -#define WIN8_SRV_MAJOR_MINOR (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) +#define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) /* * Global state maintained for transaction that is being processed. @@ -587,6 +591,8 @@ void hv_kvp_onchannelcallback(void *context) struct icmsg_hdr *icmsghdrp; struct icmsg_negotiate *negop = NULL; + int util_fw_version; + int kvp_srv_version; if (kvp_transaction.active) { /* @@ -606,17 +612,26 @@ void hv_kvp_onchannelcallback(void *context) if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { /* - * We start with win8 version and if the host cannot - * support that we use the previous version. + * Based on the host, select appropriate + * framework and service versions we will + * negotiate. */ - if (vmbus_prep_negotiate_resp(icmsghdrp, negop, - recv_buffer, UTIL_FW_MAJOR_MINOR, - WIN8_SRV_MAJOR_MINOR)) - goto done; - + switch (vmbus_proto_version) { + case (VERSION_WS2008): + util_fw_version = UTIL_WS2K8_FW_VERSION; + kvp_srv_version = WS2008_SRV_VERSION; + break; + case (VERSION_WIN7): + util_fw_version = UTIL_FW_VERSION; + kvp_srv_version = WIN7_SRV_VERSION; + break; + default: + util_fw_version = UTIL_FW_VERSION; + kvp_srv_version = WIN8_SRV_VERSION; + } vmbus_prep_negotiate_resp(icmsghdrp, negop, - recv_buffer, UTIL_FW_MAJOR_MINOR, - WIN7_SRV_MAJOR_MINOR); + recv_buffer, util_fw_version, + kvp_srv_version); } else { kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ @@ -649,7 +664,6 @@ void hv_kvp_onchannelcallback(void *context) return; } -done: icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index e4572f3f2834..0c3546224376 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -26,7 +26,7 @@ #define VSS_MAJOR 5 #define VSS_MINOR 0 -#define VSS_MAJOR_MINOR (VSS_MAJOR << 16 | VSS_MINOR) +#define VSS_VERSION (VSS_MAJOR << 16 | VSS_MINOR) @@ -190,8 +190,8 @@ void hv_vss_onchannelcallback(void *context) if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { vmbus_prep_negotiate_resp(icmsghdrp, negop, - recv_buffer, UTIL_FW_MAJOR_MINOR, - VSS_MAJOR_MINOR); + recv_buffer, UTIL_FW_VERSION, + VSS_VERSION); } else { vss_msg = (struct hv_vss_msg *)&recv_buffer[ sizeof(struct vmbuspipe_hdr) + diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index cb82233541b1..273e3ddb3a20 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -28,17 +28,32 @@ #include <linux/reboot.h> #include <linux/hyperv.h> -#define SHUTDOWN_MAJOR 3 -#define SHUTDOWN_MINOR 0 -#define SHUTDOWN_MAJOR_MINOR (SHUTDOWN_MAJOR << 16 | SHUTDOWN_MINOR) -#define TIMESYNCH_MAJOR 3 -#define TIMESYNCH_MINOR 0 -#define TIMESYNCH_MAJOR_MINOR (TIMESYNCH_MAJOR << 16 | TIMESYNCH_MINOR) +#define SD_MAJOR 3 +#define SD_MINOR 0 +#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR) -#define HEARTBEAT_MAJOR 3 -#define HEARTBEAT_MINOR 0 -#define HEARTBEAT_MAJOR_MINOR (HEARTBEAT_MAJOR << 16 | HEARTBEAT_MINOR) +#define SD_WS2008_MAJOR 1 +#define SD_WS2008_VERSION (SD_WS2008_MAJOR << 16 | SD_MINOR) + +#define TS_MAJOR 3 +#define TS_MINOR 0 +#define TS_VERSION (TS_MAJOR << 16 | TS_MINOR) + +#define TS_WS2008_MAJOR 1 +#define TS_WS2008_VERSION (TS_WS2008_MAJOR << 16 | TS_MINOR) + +#define HB_MAJOR 3 +#define HB_MINOR 0 +#define HB_VERSION (HB_MAJOR << 16 | HB_MINOR) + +#define HB_WS2008_MAJOR 1 +#define HB_WS2008_VERSION (HB_WS2008_MAJOR << 16 | HB_MINOR) + +static int sd_srv_version; +static int ts_srv_version; +static int hb_srv_version; +static int util_fw_version; static void shutdown_onchannelcallback(void *context); static struct hv_util_service util_shutdown = { @@ -99,8 +114,8 @@ static void shutdown_onchannelcallback(void *context) if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { vmbus_prep_negotiate_resp(icmsghdrp, negop, - shut_txf_buf, UTIL_FW_MAJOR_MINOR, - SHUTDOWN_MAJOR_MINOR); + shut_txf_buf, util_fw_version, + sd_srv_version); } else { shutdown_msg = (struct shutdown_msg_data *)&shut_txf_buf[ @@ -216,6 +231,7 @@ static void timesync_onchannelcallback(void *context) struct icmsg_hdr *icmsghdrp; struct ictimesync_data *timedatap; u8 *time_txf_buf = util_timesynch.recv_buffer; + struct icmsg_negotiate *negop = NULL; vmbus_recvpacket(channel, time_txf_buf, PAGE_SIZE, &recvlen, &requestid); @@ -225,9 +241,10 @@ static void timesync_onchannelcallback(void *context) sizeof(struct vmbuspipe_hdr)]; if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - vmbus_prep_negotiate_resp(icmsghdrp, NULL, time_txf_buf, - UTIL_FW_MAJOR_MINOR, - TIMESYNCH_MAJOR_MINOR); + vmbus_prep_negotiate_resp(icmsghdrp, negop, + time_txf_buf, + util_fw_version, + ts_srv_version); } else { timedatap = (struct ictimesync_data *)&time_txf_buf[ sizeof(struct vmbuspipe_hdr) + @@ -257,6 +274,7 @@ static void heartbeat_onchannelcallback(void *context) struct icmsg_hdr *icmsghdrp; struct heartbeat_msg_data *heartbeat_msg; u8 *hbeat_txf_buf = util_heartbeat.recv_buffer; + struct icmsg_negotiate *negop = NULL; vmbus_recvpacket(channel, hbeat_txf_buf, PAGE_SIZE, &recvlen, &requestid); @@ -266,9 +284,9 @@ static void heartbeat_onchannelcallback(void *context) sizeof(struct vmbuspipe_hdr)]; if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - vmbus_prep_negotiate_resp(icmsghdrp, NULL, - hbeat_txf_buf, UTIL_FW_MAJOR_MINOR, - HEARTBEAT_MAJOR_MINOR); + vmbus_prep_negotiate_resp(icmsghdrp, negop, + hbeat_txf_buf, util_fw_version, + hb_srv_version); } else { heartbeat_msg = (struct heartbeat_msg_data *)&hbeat_txf_buf[ @@ -321,6 +339,25 @@ static int util_probe(struct hv_device *dev, goto error; hv_set_drvdata(dev, srv); + /* + * Based on the host; initialize the framework and + * service version numbers we will negotiate. + */ + switch (vmbus_proto_version) { + case (VERSION_WS2008): + util_fw_version = UTIL_WS2K8_FW_VERSION; + sd_srv_version = SD_WS2008_VERSION; + ts_srv_version = TS_WS2008_VERSION; + hb_srv_version = HB_WS2008_VERSION; + break; + + default: + util_fw_version = UTIL_FW_VERSION; + sd_srv_version = SD_VERSION; + ts_srv_version = TS_VERSION; + hb_srv_version = HB_VERSION; + } + return 0; error: diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 62c2e32e25ef..3288f13d2d87 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -230,6 +230,7 @@ static int send_argument(const char *key) static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) { + u8 status, data = 0; int i; if (send_command(cmd) || send_argument(key)) { @@ -237,6 +238,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) return -EIO; } + /* This has no effect on newer (2012) SMCs */ if (send_byte(len, APPLESMC_DATA_PORT)) { pr_warn("%.4s: read len fail\n", key); return -EIO; @@ -250,6 +252,17 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) buffer[i] = inb(APPLESMC_DATA_PORT); } + /* Read the data port until bit0 is cleared */ + for (i = 0; i < 16; i++) { + udelay(APPLESMC_MIN_WAIT); + status = inb(APPLESMC_CMD_PORT); + if (!(status & 0x01)) + break; + data = inb(APPLESMC_DATA_PORT); + } + if (i) + pr_warn("flushed %d bytes, last value is: %d\n", i, data); + return 0; } @@ -525,16 +538,25 @@ static int applesmc_init_smcreg_try(void) { struct applesmc_registers *s = &smcreg; bool left_light_sensor, right_light_sensor; + unsigned int count; u8 tmp[1]; int ret; if (s->init_complete) return 0; - ret = read_register_count(&s->key_count); + ret = read_register_count(&count); if (ret) return ret; + if (s->cache && s->key_count != count) { + pr_warn("key count changed from %d to %d\n", + s->key_count, count); + kfree(s->cache); + s->cache = NULL; + } + s->key_count = count; + if (!s->cache) s->cache = kcalloc(s->key_count, sizeof(*s->cache), GFP_KERNEL); if (!s->cache) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index dbecf08399f8..5888feef1ac5 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -98,6 +98,8 @@ #define DW_IC_ERR_TX_ABRT 0x1 +#define DW_IC_TAR_10BITADDR_MASTER BIT(12) + /* * status codes */ @@ -388,22 +390,34 @@ static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev) static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) { struct i2c_msg *msgs = dev->msgs; - u32 ic_con; + u32 ic_con, ic_tar = 0; /* Disable the adapter */ __i2c_dw_enable(dev, false); - /* set the slave (target) address */ - dw_writel(dev, msgs[dev->msg_write_idx].addr, DW_IC_TAR); - /* if the slave address is ten bit address, enable 10BITADDR */ ic_con = dw_readl(dev, DW_IC_CON); - if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) + if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) { ic_con |= DW_IC_CON_10BITADDR_MASTER; - else + /* + * If I2C_DYNAMIC_TAR_UPDATE is set, the 10-bit addressing + * mode has to be enabled via bit 12 of IC_TAR register. + * We set it always as I2C_DYNAMIC_TAR_UPDATE can't be + * detected from registers. + */ + ic_tar = DW_IC_TAR_10BITADDR_MASTER; + } else { ic_con &= ~DW_IC_CON_10BITADDR_MASTER; + } + dw_writel(dev, ic_con, DW_IC_CON); + /* + * Set the slave (target) address and enable 10-bit addressing mode + * if applicable. + */ + dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR); + /* Enable the adapter */ __i2c_dw_enable(dev, true); diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 4c1b60539a25..0aa01136f8d9 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -270,7 +270,8 @@ static SIMPLE_DEV_PM_OPS(dw_i2c_dev_pm_ops, dw_i2c_suspend, dw_i2c_resume); MODULE_ALIAS("platform:i2c_designware"); static struct platform_driver dw_i2c_driver = { - .remove = dw_i2c_remove, + .probe = dw_i2c_probe, + .remove = dw_i2c_remove, .driver = { .name = "i2c_designware", .owner = THIS_MODULE, @@ -282,7 +283,7 @@ static struct platform_driver dw_i2c_driver = { static int __init dw_i2c_init_driver(void) { - return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe); + return platform_driver_register(&dw_i2c_driver); } subsys_initcall(dw_i2c_init_driver); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index ccf46656bdad..1d7efa3169cd 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -365,7 +365,7 @@ static void i2c_imx_stop(struct imx_i2c_struct *i2c_imx) clk_disable_unprepare(i2c_imx->clk); } -static void __init i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx, +static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx, unsigned int rate) { struct imx_i2c_clk_pair *i2c_clk_div = i2c_imx->hwdata->clk_div; @@ -589,7 +589,7 @@ static struct i2c_algorithm i2c_imx_algo = { .functionality = i2c_imx_func, }; -static int __init i2c_imx_probe(struct platform_device *pdev) +static int i2c_imx_probe(struct platform_device *pdev) { const struct of_device_id *of_id = of_match_device(i2c_imx_dt_ids, &pdev->dev); @@ -697,7 +697,7 @@ static int __init i2c_imx_probe(struct platform_device *pdev) return 0; /* Return OK */ } -static int __exit i2c_imx_remove(struct platform_device *pdev) +static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); @@ -715,7 +715,8 @@ static int __exit i2c_imx_remove(struct platform_device *pdev) } static struct platform_driver i2c_imx_driver = { - .remove = __exit_p(i2c_imx_remove), + .probe = i2c_imx_probe, + .remove = i2c_imx_remove, .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, @@ -726,7 +727,7 @@ static struct platform_driver i2c_imx_driver = { static int __init i2c_adap_imx_init(void) { - return platform_driver_probe(&i2c_imx_driver, i2c_imx_probe); + return platform_driver_register(&i2c_imx_driver); } subsys_initcall(i2c_adap_imx_init); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 8ed79a086f85..1672effbcebb 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -393,6 +393,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, desc = &priv->hw[priv->head]; + /* Initialize the DMA buffer */ + memset(priv->dma_buffer, 0, sizeof(priv->dma_buffer)); + /* Initialize the descriptor */ memset(desc, 0, sizeof(struct ismt_desc)); desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write); diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 7f3a47443494..d3e9cc3153a9 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -234,9 +234,9 @@ static int mv64xxx_i2c_offload_msg(struct mv64xxx_i2c_data *drv_data) ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_WR | (msg->len - 1) << MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT; - writel_relaxed(data_reg_lo, + writel(data_reg_lo, drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_LO); - writel_relaxed(data_reg_hi, + writel(data_reg_hi, drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_HI); } else { @@ -697,6 +697,7 @@ static const struct of_device_id mv64xxx_i2c_of_match_table[] = { MODULE_DEVICE_TABLE(of, mv64xxx_i2c_of_match_table); #ifdef CONFIG_OF +#ifdef CONFIG_HAVE_CLK static int mv64xxx_calc_freq(const int tclk, const int n, const int m) { @@ -726,16 +727,12 @@ mv64xxx_find_baud_factors(const u32 req_freq, const u32 tclk, u32 *best_n, return false; return true; } +#endif /* CONFIG_HAVE_CLK */ static int mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, struct device *dev) { - const struct of_device_id *device; - struct device_node *np = dev->of_node; - u32 bus_freq, tclk; - int rc = 0; - /* CLK is mandatory when using DT to describe the i2c bus. We * need to know tclk in order to calculate bus clock * factors. @@ -744,6 +741,11 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, /* Have OF but no CLK */ return -ENODEV; #else + const struct of_device_id *device; + struct device_node *np = dev->of_node; + u32 bus_freq, tclk; + int rc = 0; + if (IS_ERR(drv_data->clk)) { rc = -ENODEV; goto out; diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index f4a01675fa71..b7c857774708 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -780,12 +780,13 @@ static struct platform_driver mxs_i2c_driver = { .owner = THIS_MODULE, .of_match_table = mxs_i2c_dt_ids, }, + .probe = mxs_i2c_probe, .remove = mxs_i2c_remove, }; static int __init mxs_i2c_init(void) { - return platform_driver_probe(&mxs_i2c_driver, mxs_i2c_probe); + return platform_driver_register(&mxs_i2c_driver); } subsys_initcall(mxs_i2c_init); diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 6d8308d5dc4e..9967a6f9c2ff 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -939,6 +939,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) /* * ProDB0017052: Clear ARDY bit twice */ + if (stat & OMAP_I2C_STAT_ARDY) + omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY); + if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | OMAP_I2C_STAT_AL)) { omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY | diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 3535f3c0f7b4..3747b9bf67d6 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -1178,8 +1178,6 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&i2c->adap); - clk_disable_unprepare(i2c->clk); - if (pdev->dev.of_node && IS_ERR(i2c->pctrl)) s3c24xx_i2c_dt_gpio_free(i2c); diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c index f8f6f2e552db..04a17b9b38bb 100644 --- a/drivers/i2c/busses/i2c-stu300.c +++ b/drivers/i2c/busses/i2c-stu300.c @@ -859,8 +859,7 @@ static const struct i2c_algorithm stu300_algo = { .functionality = stu300_func, }; -static int __init -stu300_probe(struct platform_device *pdev) +static int stu300_probe(struct platform_device *pdev) { struct stu300_dev *dev; struct i2c_adapter *adap; @@ -966,8 +965,7 @@ static SIMPLE_DEV_PM_OPS(stu300_pm, stu300_suspend, stu300_resume); #define STU300_I2C_PM NULL #endif -static int __exit -stu300_remove(struct platform_device *pdev) +static int stu300_remove(struct platform_device *pdev) { struct stu300_dev *dev = platform_get_drvdata(pdev); @@ -989,13 +987,14 @@ static struct platform_driver stu300_i2c_driver = { .pm = STU300_I2C_PM, .of_match_table = stu300_dt_match, }, - .remove = __exit_p(stu300_remove), + .probe = stu300_probe, + .remove = stu300_remove, }; static int __init stu300_init(void) { - return platform_driver_probe(&stu300_i2c_driver, stu300_probe); + return platform_driver_register(&stu300_i2c_driver); } static void __exit stu300_exit(void) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 29d3f045a2bf..3be58f89ac77 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1134,6 +1134,9 @@ static void acpi_i2c_register_devices(struct i2c_adapter *adap) acpi_handle handle; acpi_status status; + if (!adap->dev.parent) + return; + handle = ACPI_HANDLE(adap->dev.parent); if (!handle) return; diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c index 74b41ae690f3..928656e241dd 100644 --- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c +++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c @@ -200,7 +200,7 @@ static int i2c_arbitrator_probe(struct platform_device *pdev) arb->parent = of_find_i2c_adapter_by_node(parent_np); if (!arb->parent) { dev_err(dev, "Cannot find parent bus\n"); - return -EINVAL; + return -EPROBE_DEFER; } /* Actually add the mux adapter */ diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c index 5d4a99ba743e..a764da777f08 100644 --- a/drivers/i2c/muxes/i2c-mux-gpio.c +++ b/drivers/i2c/muxes/i2c-mux-gpio.c @@ -66,7 +66,7 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, struct device_node *adapter_np, *child; struct i2c_adapter *adapter; unsigned *values, *gpios; - int i = 0; + int i = 0, ret; if (!np) return -ENODEV; @@ -79,7 +79,7 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, adapter = of_find_i2c_adapter_by_node(adapter_np); if (!adapter) { dev_err(&pdev->dev, "Cannot find parent bus\n"); - return -ENODEV; + return -EPROBE_DEFER; } mux->data.parent = i2c_adapter_id(adapter); put_device(&adapter->dev); @@ -116,8 +116,12 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux, return -ENOMEM; } - for (i = 0; i < mux->data.n_gpios; i++) - gpios[i] = of_get_named_gpio(np, "mux-gpios", i); + for (i = 0; i < mux->data.n_gpios; i++) { + ret = of_get_named_gpio(np, "mux-gpios", i); + if (ret < 0) + return ret; + gpios[i] = ret; + } mux->data.gpios = gpios; @@ -177,7 +181,7 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev) if (!parent) { dev_err(&pdev->dev, "Parent adapter (%d) not found\n", mux->data.parent); - return -ENODEV; + return -EPROBE_DEFER; } mux->parent = parent; diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c index 69a91732ae65..68a37157377d 100644 --- a/drivers/i2c/muxes/i2c-mux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c @@ -113,7 +113,7 @@ static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, adapter = of_find_i2c_adapter_by_node(adapter_np); if (!adapter) { dev_err(mux->dev, "Cannot find parent bus\n"); - return -ENODEV; + return -EPROBE_DEFER; } mux->pdata->parent_bus_num = i2c_adapter_id(adapter); put_device(&adapter->dev); @@ -211,7 +211,7 @@ static int i2c_mux_pinctrl_probe(struct platform_device *pdev) if (!mux->parent) { dev_err(&pdev->dev, "Parent adapter (%d) not found\n", mux->pdata->parent_bus_num); - ret = -ENODEV; + ret = -EPROBE_DEFER; goto err; } diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c index d0a79a4bce1c..ba6f6a91dfff 100644 --- a/drivers/iio/amplifiers/ad8366.c +++ b/drivers/iio/amplifiers/ad8366.c @@ -185,10 +185,8 @@ static int ad8366_remove(struct spi_device *spi) iio_device_unregister(indio_dev); - if (!IS_ERR(reg)) { + if (!IS_ERR(reg)) regulator_disable(reg); - regulator_put(reg); - } return 0; } diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 8e84cd522e49..f95c6979efd8 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -852,7 +852,6 @@ static void iio_dev_release(struct device *device) iio_device_unregister_trigger_consumer(indio_dev); iio_device_unregister_eventset(indio_dev); iio_device_unregister_sysfs(indio_dev); - iio_device_unregister_debugfs(indio_dev); ida_simple_remove(&iio_ida, indio_dev->id); kfree(indio_dev); @@ -1087,6 +1086,7 @@ void iio_device_unregister(struct iio_dev *indio_dev) if (indio_dev->chrdev.dev) cdev_del(&indio_dev->chrdev); + iio_device_unregister_debugfs(indio_dev); iio_disable_all_buffers(indio_dev); diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index e8d2849cc81d..cab3bc7494a2 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -29,9 +29,9 @@ #define ST_MAGN_NUMBER_DATA_CHANNELS 3 /* DEFAULT VALUE FOR SENSORS */ -#define ST_MAGN_DEFAULT_OUT_X_L_ADDR 0X04 -#define ST_MAGN_DEFAULT_OUT_Y_L_ADDR 0X08 -#define ST_MAGN_DEFAULT_OUT_Z_L_ADDR 0X06 +#define ST_MAGN_DEFAULT_OUT_X_H_ADDR 0X03 +#define ST_MAGN_DEFAULT_OUT_Y_H_ADDR 0X07 +#define ST_MAGN_DEFAULT_OUT_Z_H_ADDR 0X05 /* FULLSCALE */ #define ST_MAGN_FS_AVL_1300MG 1300 @@ -117,16 +117,16 @@ static const struct iio_chan_spec st_magn_16bit_channels[] = { ST_SENSORS_LSM_CHANNELS(IIO_MAGN, BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), - ST_SENSORS_SCAN_X, 1, IIO_MOD_X, 's', IIO_LE, 16, 16, - ST_MAGN_DEFAULT_OUT_X_L_ADDR), + ST_SENSORS_SCAN_X, 1, IIO_MOD_X, 's', IIO_BE, 16, 16, + ST_MAGN_DEFAULT_OUT_X_H_ADDR), ST_SENSORS_LSM_CHANNELS(IIO_MAGN, BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), - ST_SENSORS_SCAN_Y, 1, IIO_MOD_Y, 's', IIO_LE, 16, 16, - ST_MAGN_DEFAULT_OUT_Y_L_ADDR), + ST_SENSORS_SCAN_Y, 1, IIO_MOD_Y, 's', IIO_BE, 16, 16, + ST_MAGN_DEFAULT_OUT_Y_H_ADDR), ST_SENSORS_LSM_CHANNELS(IIO_MAGN, BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), - ST_SENSORS_SCAN_Z, 1, IIO_MOD_Z, 's', IIO_LE, 16, 16, - ST_MAGN_DEFAULT_OUT_Z_L_ADDR), + ST_SENSORS_SCAN_Z, 1, IIO_MOD_Z, 's', IIO_BE, 16, 16, + ST_MAGN_DEFAULT_OUT_Z_H_ADDR), IIO_CHAN_SOFT_TIMESTAMP(3) }; diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index d5d1929753e4..cedda25232be 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state) return "C2_QP_STATE_ERROR"; default: return "<invalid QP state>"; - }; + } } void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3f831de9a4d8..b1a6cb3a2809 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) static int alloc_comp_eqs(struct mlx5_ib_dev *dev) { struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; struct mlx5_eq *eq, *n; int ncomp_vec; int nent; @@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev) goto clean; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(&dev->mdev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - eq->name, - &dev->mdev.priv.uuari.uars[0]); + name, &dev->mdev.priv.uuari.uars[0]); if (err) { kfree(eq); goto clean; @@ -301,9 +301,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay; - props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ? - IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->masked_atomic_cap = IB_ATOMIC_HCA; + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg; props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; @@ -1006,6 +1005,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, ibev.device = &ibdev->ib_dev; ibev.element.port_num = port; + if (port < 1 || port > ibdev->num_ports) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + return; + } + if (ibdev->ib_active) ib_dispatch_event(&ibev); } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bd41df95b6f0..3453580b1eb2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -42,6 +42,10 @@ enum { DEF_CACHE_SIZE = 10, }; +enum { + MLX5_UMR_ALIGN = 2048 +}; + static __be64 *mr_align(__be64 *ptr, int align) { unsigned long mask = align - 1; @@ -61,13 +65,11 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; int npages = 1 << ent->order; - int size = sizeof(u64) * npages; int err = 0; int i; @@ -83,21 +85,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) } mr->order = ent->order; mr->umred = 1; - mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); - if (!mr->pas) { - kfree(mr); - err = -ENOMEM; - goto out; - } - mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, mr->dma)) { - kfree(mr->pas); - kfree(mr); - err = -ENOMEM; - goto out; - } - in->seg.status = 1 << 6; in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); @@ -108,8 +95,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) sizeof(*in)); if (err) { mlx5_ib_warn(dev, "create mkey failed %d\n", err); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); kfree(mr); goto out; } @@ -129,11 +114,9 @@ out: static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; - int size; int err; int i; @@ -149,14 +132,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ent->size--; spin_unlock(&ent->lock); err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); - if (err) { + if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); - } else { - size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); + else kfree(mr); - } } } @@ -408,13 +387,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void clean_keys(struct mlx5_ib_dev *dev, int c) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; - int size; int err; + cancel_delayed_work(&ent->dwork); while (1) { spin_lock(&ent->lock); if (list_empty(&ent->head)) { @@ -427,14 +405,10 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ent->size--; spin_unlock(&ent->lock); err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); - if (err) { + if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); - } else { - size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); + else kfree(mr); - } } } @@ -540,13 +514,15 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) int i; dev->cache.stopped = 1; - destroy_workqueue(dev->cache.wq); + flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); + destroy_workqueue(dev->cache.wq); + return 0; } @@ -675,10 +651,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, int page_shift, int order, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct ib_send_wr wr, *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; + int size = sizeof(u64) * npages; int err; int i; @@ -697,7 +675,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); + mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!mr->pas) { + err = -ENOMEM; + goto error; + } + + mlx5_ib_populate_pas(dev, umem, page_shift, + mr_align(mr->pas, MLX5_UMR_ALIGN), 1); + + mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->dma)) { + kfree(mr->pas); + err = -ENOMEM; + goto error; + } memset(&wr, 0, sizeof(wr)); wr.wr_id = (u64)(unsigned long)mr; @@ -718,6 +711,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, wait_for_completion(&mr->done); up(&umrc->sem); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + if (mr->status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "reg umr failed\n"); err = -EFAULT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 045f8cdbd303..5659ea880741 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type) switch (qp_type) { case IB_QPT_XRC_INI: - size = sizeof(struct mlx5_wqe_xrc_seg); + size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ case IB_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + @@ -211,20 +211,23 @@ static int sq_overhead(enum ib_qp_type qp_type) sizeof(struct mlx5_wqe_raddr_seg); break; + case IB_QPT_XRC_TGT: + return 0; + case IB_QPT_UC: - size = sizeof(struct mlx5_wqe_ctrl_seg) + + size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_raddr_seg); break; case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - size = sizeof(struct mlx5_wqe_ctrl_seg) + + size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; case MLX5_IB_QPT_REG_UMR: - size = sizeof(struct mlx5_wqe_ctrl_seg) + + size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_mkey_seg); break; @@ -270,7 +273,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return wqe_size; if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { - mlx5_ib_dbg(dev, "\n"); + mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", + wqe_size, dev->mdev.caps.max_sq_desc_sz); return -EINVAL; } @@ -280,9 +284,15 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -ENOMEM; + } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.max_gs = attr->cap.max_send_sge; - qp->sq.max_post = 1 << ilog2(wq_size / wqe_size); + qp->sq.max_post = wq_size / wqe_size; + attr->cap.max_send_wr = qp->sq.max_post; return wq_size; } @@ -1280,6 +1290,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, }, }, [MLX5_QP_STATE_RTR] = { @@ -1314,6 +1329,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_STATE_RTS] = { [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE, + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; @@ -1651,29 +1671,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr) -{ - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); - } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = 0; - } -} - -static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg, - struct ib_send_wr *wr) -{ - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); -} - static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -2063,28 +2060,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(seg, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - - set_atomic_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_atomic_seg); - - size += (sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg)) / 16; - break; - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - set_raddr_seg(seg, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - - set_masked_atomic_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_masked_atomic_seg); - - size += (sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16; - break; + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -ENOSYS; + *bad_wr = wr; + goto out; case IB_WR_LOCAL_INV: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 84d297afd6a9..0aa478bc291a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -295,7 +295,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_vfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); - goto err_srq; + goto err_usr_kern_srq; } mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); @@ -316,6 +316,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, err_core: mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + +err_usr_kern_srq: if (pd->uobject) destroy_srq_user(pd, srq); else diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 7c9d35f39d75..690201738993 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -357,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n", eqe->type, eqe->subtype, eq->eqn); break; - }; + } set_eqe_hw(eqe); ++eq->cons_index; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 4ed8235d2d36..50219ab2279d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -150,7 +150,7 @@ enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps) return IB_QPS_SQE; case OCRDMA_QPS_ERR: return IB_QPS_ERR; - }; + } return IB_QPS_ERR; } @@ -171,7 +171,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps) return OCRDMA_QPS_SQE; case IB_QPS_ERR: return OCRDMA_QPS_ERR; - }; + } return OCRDMA_QPS_ERR; } @@ -1982,7 +1982,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, break; default: return -EINVAL; - }; + } cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd)); if (!cmd) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 56e004940f18..0ce7674621ea 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -531,7 +531,7 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) case BE_DEV_DOWN: ocrdma_close(dev); break; - }; + } } static struct ocrdma_driver ocrdma_drv = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 6e982bb43c31..69f1d1221a6b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -141,7 +141,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, /* Unsupported */ *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; - }; + } } @@ -2331,7 +2331,7 @@ static enum ib_wc_status ocrdma_to_ibwc_err(u16 status) default: ibwc_status = IB_WC_GENERAL_ERR; break; - }; + } return ibwc_status; } @@ -2370,7 +2370,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, pr_err("%s() invalid opcode received = 0x%x\n", __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); break; - }; + } } static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 653ac6bfc57a..6c923c7039a1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1588,7 +1588,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, int resp_data_len; int resp_len; - resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4; + resp_data_len = 4; resp_len = sizeof(*srp_rsp) + resp_data_len; srp_rsp = ioctx->ioctx.buf; @@ -1600,11 +1600,9 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; - if (rsp_code != SRP_TSK_MGMT_SUCCESS) { - srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; - srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); - srp_rsp->data[3] = rsp_code; - } + srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; + srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); + srp_rsp->data[3] = rsp_code; return resp_len; } @@ -2358,6 +2356,8 @@ static void srpt_release_channel_work(struct work_struct *w) transport_deregister_session(se_sess); ch->sess = NULL; + ib_destroy_cm_id(ch->cm_id); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2368,8 +2368,6 @@ static void srpt_release_channel_work(struct work_struct *w) list_del(&ch->list); spin_unlock_irq(&sdev->spinlock); - ib_destroy_cm_id(ch->cm_id); - if (ch->release_done) complete(ch->release_done); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index fe302e33f72e..c880ebaf1553 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -52,7 +52,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API - depends on X86_64 && PCI && ACPI && X86_IO_APIC + depends on X86_64 && PCI && ACPI ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f417e89e1e7e..181c9ba929cd 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -377,6 +377,7 @@ struct arm_smmu_cfg { u32 cbar; pgd_t *pgd; }; +#define INVALID_IRPTNDX 0xff #define ARM_SMMU_CB_ASID(cfg) ((cfg)->cbndx) #define ARM_SMMU_CB_VMID(cfg) ((cfg)->cbndx + 1) @@ -840,7 +841,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (IS_ERR_VALUE(ret)) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", root_cfg->irptndx, irq); - root_cfg->irptndx = -1; + root_cfg->irptndx = INVALID_IRPTNDX; goto out_free_context; } @@ -869,7 +870,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); arm_smmu_tlb_inv_context(root_cfg); - if (root_cfg->irptndx != -1) { + if (root_cfg->irptndx != INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx]; free_irq(irq, domain); } @@ -1857,8 +1858,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) goto out_put_parent; } - arm_smmu_device_reset(smmu); - for (i = 0; i < smmu->num_global_irqs; ++i) { err = request_irq(smmu->irqs[i], arm_smmu_global_fault, @@ -1876,6 +1875,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) spin_lock(&arm_smmu_devices_lock); list_add(&smmu->list, &arm_smmu_devices); spin_unlock(&arm_smmu_devices_lock); + + arm_smmu_device_reset(smmu); return 0; out_free_irqs: @@ -1966,10 +1967,10 @@ static int __init arm_smmu_init(void) return ret; /* Oh, for a proper bus abstraction */ - if (!iommu_present(&platform_bus_type)); + if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &arm_smmu_ops); - if (!iommu_present(&amba_bustype)); + if (!iommu_present(&amba_bustype)) bus_set_iommu(&amba_bustype, &arm_smmu_ops); return 0; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b39f6f0b45f2..0f12382aa35d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -498,7 +498,7 @@ struct cached_dev { */ atomic_t has_dirty; - struct ratelimit writeback_rate; + struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; /* @@ -507,10 +507,9 @@ struct cached_dev { */ sector_t last_read; - /* Number of writeback bios in flight */ - atomic_t in_flight; + /* Limit number of writeback bios in flight */ + struct semaphore in_flight; struct closure_with_timer writeback; - struct closure_waitlist writeback_wait; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 8010eed06a51..22d1ae72c282 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search) /* Mergesort */ +static void sort_key_next(struct btree_iter *iter, + struct btree_iter_set *i) +{ + i->k = bkey_next(i->k); + + if (i->k == i->end) + *i = iter->data[--iter->used]; +} + static void btree_sort_fixup(struct btree_iter *iter) { while (iter->used > 1) { struct btree_iter_set *top = iter->data, *i = top + 1; - struct bkey *k; if (iter->used > 2 && btree_iter_cmp(i[0], i[1])) i++; - for (k = i->k; - k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0; - k = bkey_next(k)) - if (top->k > i->k) - __bch_cut_front(top->k, k); - else if (KEY_SIZE(k)) - bch_cut_back(&START_KEY(k), top->k); - - if (top->k < i->k || k == i->k) + if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) break; - heap_sift(iter, i - top, btree_iter_cmp); + if (!KEY_SIZE(i->k)) { + sort_key_next(iter, i); + heap_sift(iter, i - top, btree_iter_cmp); + continue; + } + + if (top->k > i->k) { + if (bkey_cmp(top->k, i->k) >= 0) + sort_key_next(iter, i); + else + bch_cut_front(top->k, i->k); + + heap_sift(iter, i - top, btree_iter_cmp); + } else { + /* can't happen because of comparison func */ + BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); + bch_cut_back(&START_KEY(i->k), top->k); + } } } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index f9764e61978b..f42fc7ed9cd6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b) return; err: - bch_cache_set_error(b->c, "io error reading bucket %lu", + bch_cache_set_error(b->c, "io error reading bucket %zu", PTR_BUCKET_NR(b->c, &b->key, 0)); } @@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, return SHRINK_STOP; /* Return -1 if we can't do anything right now */ - if (sc->gfp_mask & __GFP_WAIT) + if (sc->gfp_mask & __GFP_IO) mutex_lock(&c->bucket_lock); else if (!mutex_trylock(&c->bucket_lock)) return -1; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ba95ab84b2be..8435f81e5d85 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list, bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); pr_debug("%u journal buckets", ca->sb.njournal_buckets); - /* Read journal buckets ordered by golden ratio hash to quickly + /* + * Read journal buckets ordered by golden ratio hash to quickly * find a sequence of buckets with valid journal entries */ for (i = 0; i < ca->sb.njournal_buckets; i++) { @@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list, goto bsearch; } - /* If that fails, check all the buckets we haven't checked + /* + * If that fails, check all the buckets we haven't checked * already */ pr_debug("falling back to linear search"); - for (l = 0; l < ca->sb.njournal_buckets; l++) { - if (test_bit(l, bitmap)) - continue; - + for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); + l < ca->sb.njournal_buckets; + l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1)) if (read_bucket(l)) goto bsearch; - } + + if (list_empty(list)) + continue; bsearch: /* Binary search */ m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); @@ -197,10 +200,12 @@ bsearch: r = m; } - /* Read buckets in reverse order until we stop finding more + /* + * Read buckets in reverse order until we stop finding more * journal entries */ - pr_debug("finishing up"); + pr_debug("finishing up: m %u njournal_buckets %u", + m, ca->sb.njournal_buckets); l = m; while (1) { @@ -228,9 +233,10 @@ bsearch: } } - c->journal.seq = list_entry(list->prev, - struct journal_replay, - list)->j.seq; + if (!list_empty(list)) + c->journal.seq = list_entry(list->prev, + struct journal_replay, + list)->j.seq; return 0; #undef read_bucket @@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca) return; } - switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { + switch (atomic_read(&ja->discard_in_flight)) { case DISCARD_IN_FLIGHT: return; @@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl) if (cl) BUG_ON(!closure_wait(&w->wait, cl)); + closure_flush(&c->journal.io); __journal_try_write(c, true); } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 786a1a4f74d8..b6a74bcbb08f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -996,17 +996,19 @@ static void request_write(struct cached_dev *dc, struct search *s) closure_bio_submit(bio, cl, s->d); } else { bch_writeback_add(dc); + s->op.cache_bio = bio; - if (s->op.flush_journal) { + if (bio->bi_rw & REQ_FLUSH) { /* Also need to send a flush to the backing device */ - s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, - dc->disk.bio_split); - - bio->bi_size = 0; - bio->bi_vcnt = 0; - closure_bio_submit(bio, cl, s->d); - } else { - s->op.cache_bio = bio; + struct bio *flush = bio_alloc_bioset(0, GFP_NOIO, + dc->disk.bio_split); + + flush->bi_rw = WRITE_FLUSH; + flush->bi_bdev = bio->bi_bdev; + flush->bi_end_io = request_endio; + flush->bi_private = cl; + + closure_bio_submit(flush, cl, s->d); } } out: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4fe6ab2fbe2e..924dcfdae111 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -223,8 +223,13 @@ STORE(__cached_dev) } if (attr == &sysfs_label) { - /* note: endlines are preserved */ - memcpy(dc->sb.label, buf, SB_LABEL_SIZE); + if (size > SB_LABEL_SIZE) + return -EINVAL; + memcpy(dc->sb.label, buf, size); + if (size < SB_LABEL_SIZE) + dc->sb.label[size] = '\0'; + if (size && dc->sb.label[size - 1] == '\n') + dc->sb.label[size - 1] = '\0'; bch_write_bdev_super(dc, NULL); if (dc->disk.c) { memcpy(dc->disk.c->uuids[dc->disk.id].label, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 98eb81159a22..420dad545c7d 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time) stats->last = now ?: 1; } -unsigned bch_next_delay(struct ratelimit *d, uint64_t done) +/** + * bch_next_delay() - increment @d by the amount of work done, and return how + * long to delay until the next time to do some work. + * + * @d - the struct bch_ratelimit to update + * @done - the amount of work done, in arbitrary units + * + * Returns the amount of time to delay by, in jiffies + */ +uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 1ae2a73ad85f..ea345c6896f4 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units) (ewma) >> factor; \ }) -struct ratelimit { +struct bch_ratelimit { + /* Next time we want to do some work, in nanoseconds */ uint64_t next; + + /* + * Rate at which we want to do work, in units per nanosecond + * The units here correspond to the units passed to bch_next_delay() + */ unsigned rate; }; -static inline void ratelimit_reset(struct ratelimit *d) +static inline void bch_ratelimit_reset(struct bch_ratelimit *d) { d->next = local_clock(); } -unsigned bch_next_delay(struct ratelimit *d, uint64_t done); +uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done); #define __DIV_SAFE(n, d, zero) \ ({ \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 22cbff551628..ba3ee48320f2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work) static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) { + uint64_t ret; + if (atomic_read(&dc->disk.detaching) || !dc->writeback_percent) return 0; - return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); + ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); + + return min_t(uint64_t, ret, HZ); } /* Background writeback */ @@ -208,7 +212,7 @@ normal_refill: up_write(&dc->writeback_lock); - ratelimit_reset(&dc->writeback_rate); + bch_ratelimit_reset(&dc->writeback_rate); /* Punt to workqueue only so we don't recurse and blow the stack */ continue_at(cl, read_dirty, dirty_wq); @@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl) } bch_keybuf_del(&dc->writeback_keys, w); - atomic_dec_bug(&dc->in_flight); - - closure_wake_up(&dc->writeback_wait); + up(&dc->in_flight); closure_return_with_destructor(cl, dirty_io_destructor); } @@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl, &io->dc->disk); - continue_at(cl, write_dirty_finish, dirty_wq); + continue_at(cl, write_dirty_finish, system_wq); } static void read_dirty_endio(struct bio *bio, int error) @@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl, &io->dc->disk); - continue_at(cl, write_dirty, dirty_wq); + continue_at(cl, write_dirty, system_wq); } static void read_dirty(struct closure *cl) @@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl) if (delay > 0 && (KEY_START(&w->key) != dc->last_read || - jiffies_to_msecs(delay) > 50)) { - w->private = NULL; - - closure_delay(&dc->writeback, delay); - continue_at(cl, read_dirty, dirty_wq); - } + jiffies_to_msecs(delay) > 50)) + delay = schedule_timeout_uninterruptible(delay); dc->last_read = KEY_OFFSET(&w->key); @@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl) trace_bcache_writeback(&w->key); - closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); + down(&dc->in_flight); + closure_call(&io->cl, read_dirty_submit, NULL, cl); delay = writeback_delay(dc, KEY_SIZE(&w->key)); - - atomic_inc(&dc->in_flight); - - if (!closure_wait_event(&dc->writeback_wait, cl, - atomic_read(&dc->in_flight) < 64)) - continue_at(cl, read_dirty, dirty_wq); } if (0) { @@ -442,7 +435,11 @@ err: bch_keybuf_del(&dc->writeback_keys, w); } - refill_dirty(cl); + /* + * Wait for outstanding writeback IOs to finish (and keybuf slots to be + * freed) before refilling again + */ + continue_at(cl, refill_dirty, dirty_wq); } /* Init */ @@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc) void bch_cached_dev_writeback_init(struct cached_dev *dc) { + sema_init(&dc->in_flight, 64); closure_init_unlocked(&dc->writeback); init_rwsem(&dc->writeback_lock); @@ -513,7 +511,7 @@ void bch_writeback_exit(void) int __init bch_writeback_init(void) { - dirty_wq = create_singlethread_workqueue("bcache_writeback"); + dirty_wq = create_workqueue("bcache_writeback"); if (!dirty_wq) return -ENOMEM; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index ea49834377c8..2a20986a2fec 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -19,8 +19,6 @@ #define DM_MSG_PREFIX "io" #define DM_IO_MAX_REGIONS BITS_PER_LONG -#define MIN_IOS 16 -#define MIN_BIOS 16 struct dm_io_client { mempool_t *pool; @@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache; struct dm_io_client *dm_io_client_create(void) { struct dm_io_client *client; + unsigned min_ios = dm_get_reserved_bio_based_ios(); client = kmalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); - client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); + client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); if (!client->pool) goto bad; - client->bios = bioset_create(MIN_BIOS, 0); + client->bios = bioset_create(min_ios, 0); if (!client->bios) goto bad; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index b759a127f9c3..de570a558764 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -7,6 +7,7 @@ #include <linux/device-mapper.h> +#include "dm.h" #include "dm-path-selector.h" #include "dm-uevent.h" @@ -116,8 +117,6 @@ struct dm_mpath_io { typedef int (*action_fn) (struct pgpath *pgpath); -#define MIN_IOS 256 /* Mempool size */ - static struct kmem_cache *_mpio_cache; static struct workqueue_struct *kmultipathd, *kmpath_handlerd; @@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg, static struct multipath *alloc_multipath(struct dm_target *ti) { struct multipath *m; + unsigned min_ios = dm_get_reserved_rq_based_ios(); m = kzalloc(sizeof(*m), GFP_KERNEL); if (m) { @@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) INIT_WORK(&m->trigger_event, trigger_event); init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); - m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); + m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); if (!m->mpio_pool) { kfree(m); return NULL; @@ -1268,6 +1268,7 @@ static int noretry_error(int error) case -EREMOTEIO: case -EILSEQ: case -ENODATA: + case -ENOSPC: return 1; } @@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!error && !clone->errors) return 0; /* I/O complete */ - if (noretry_error(error)) + if (noretry_error(error)) { + if ((clone->cmd_flags & REQ_WRITE_SAME) && + !clone->q->limits.max_write_same_sectors) { + struct queue_limits *limits; + + /* device doesn't really support WRITE SAME, disable it */ + limits = dm_get_queue_limits(dm_table_get_md(m->ti->table)); + limits->max_write_same_sectors = 0; + } return error; + } if (mpio->pgpath) fail_path(mpio->pgpath); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 3ac415675b6c..4caa8e6d59d7 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, */ INIT_WORK_ONSTACK(&req.work, do_metadata); queue_work(ps->metadata_wq, &req.work); - flush_work(&req.work); + flush_workqueue(ps->metadata_wq); return req.result; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c434e5aab2df..aec57d76db5d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -725,17 +725,16 @@ static int calc_max_buckets(void) */ static int init_hash_tables(struct dm_snapshot *s) { - sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; + sector_t hash_size, cow_dev_size, max_buckets; /* * Calculate based on the size of the original volume or * the COW volume... */ cow_dev_size = get_dev_size(s->cow->bdev); - origin_dev_size = get_dev_size(s->origin->bdev); max_buckets = calc_max_buckets(); - hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; + hash_size = cow_dev_size >> s->store->chunk_shift; hash_size = min(hash_size, max_buckets); if (hash_size < 64) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8ae31e8d3d64..3d404c1371ed 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry, struct dm_stat_percpu *p; /* - * For strict correctness we should use local_irq_disable/enable + * For strict correctness we should use local_irq_save/restore * instead of preempt_disable/enable. * - * This is racy if the driver finishes bios from non-interrupt - * context as well as from interrupt context or from more different - * interrupts. + * preempt_disable/enable is racy if the driver finishes bios + * from non-interrupt context as well as from interrupt context + * or from more different interrupts. * - * However, the race only results in not counting some events, - * so it is acceptable. + * On 64-bit architectures the race only results in not counting some + * events, so it is acceptable. On 32-bit architectures the race could + * cause the counter going off by 2^32, so we need to do proper locking + * there. * * part_stat_lock()/part_stat_unlock() have this race too. */ +#if BITS_PER_LONG == 32 + unsigned long flags; + local_irq_save(flags); +#else preempt_disable(); +#endif p = &s->stat_percpu[smp_processor_id()][entry]; if (!end) { @@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry, p->ticks[idx] += duration; } +#if BITS_PER_LONG == 32 + local_irq_restore(flags); +#else preempt_enable(); +#endif } static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ed063427d676..2c0cf511ec23 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * them down to the data device. The thin device's discard * processing will cause mappings to be removed from the btree. */ + ti->discard_zeroes_data_unsupported = true; if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_bios = 1; @@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * thin devices' discard limits consistent). */ ti->discards_supported = true; - ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) * They get transferred to the live pool in bind_control_target() * called from pool_preresume(). */ - if (!pt->adjusted_pf.discard_enabled) + if (!pt->adjusted_pf.discard_enabled) { + /* + * Must explicitly disallow stacking discard limits otherwise the + * block layer will stack them if pool's data device has support. + * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the + * user to see that, so make sure to set all discard limits to 0. + */ + limits->discard_granularity = 0; return; + } disable_passdown_if_not_supported(pt); @@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ + ti->discard_zeroes_data_unsupported = true; if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - ti->discard_zeroes_data_unsupported = true; /* Discard bios must be split on a block boundary */ ti->split_discard_bios = true; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6a5e9ed2fcc3..b3e26c7d1417 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -211,10 +211,55 @@ struct dm_md_mempools { struct bio_set *bs; }; -#define MIN_IOS 256 +#define RESERVED_BIO_BASED_IOS 16 +#define RESERVED_REQUEST_BASED_IOS 256 +#define RESERVED_MAX_IOS 1024 static struct kmem_cache *_io_cache; static struct kmem_cache *_rq_tio_cache; +/* + * Bio-based DM's mempools' reserved IOs set by the user. + */ +static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; + +/* + * Request-based DM's mempools' reserved IOs set by the user. + */ +static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; + +static unsigned __dm_get_reserved_ios(unsigned *reserved_ios, + unsigned def, unsigned max) +{ + unsigned ios = ACCESS_ONCE(*reserved_ios); + unsigned modified_ios = 0; + + if (!ios) + modified_ios = def; + else if (ios > max) + modified_ios = max; + + if (modified_ios) { + (void)cmpxchg(reserved_ios, ios, modified_ios); + ios = modified_ios; + } + + return ios; +} + +unsigned dm_get_reserved_bio_based_ios(void) +{ + return __dm_get_reserved_ios(&reserved_bio_based_ios, + RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); +} +EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); + +unsigned dm_get_reserved_rq_based_ios(void) +{ + return __dm_get_reserved_ios(&reserved_rq_based_ios, + RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); +} +EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); + static int __init local_init(void) { int r = -ENOMEM; @@ -2278,6 +2323,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md) } /* + * The queue_limits are only valid as long as you have a reference + * count on 'md'. + */ +struct queue_limits *dm_get_queue_limits(struct mapped_device *md) +{ + BUG_ON(!atomic_read(&md->holders)); + return &md->queue->limits; +} +EXPORT_SYMBOL_GPL(dm_get_queue_limits); + +/* * Fully initialize a request-based queue (->elevator, ->request_fn, etc). */ static int dm_init_request_based_queue(struct mapped_device *md) @@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u if (type == DM_TYPE_BIO_BASED) { cachep = _io_cache; - pool_size = 16; + pool_size = dm_get_reserved_bio_based_ios(); front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); } else if (type == DM_TYPE_REQUEST_BASED) { cachep = _rq_tio_cache; - pool_size = MIN_IOS; + pool_size = dm_get_reserved_rq_based_ios(); front_pad = offsetof(struct dm_rq_clone_bio_info, clone); /* per_bio_data_size is not used. See __bind_mempools(). */ WARN_ON(per_bio_data_size != 0); } else goto out; - pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); + pools->io_pool = mempool_create_slab_pool(pool_size, cachep); if (!pools->io_pool) goto out; @@ -2924,6 +2980,13 @@ module_exit(dm_exit); module_param(major, uint, 0); MODULE_PARM_DESC(major, "The major number of the device mapper"); + +module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); + +module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); + MODULE_DESCRIPTION(DM_NAME " driver"); MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 5e604cc7b4aa..1d1ad7b7e527 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools); /* * Helpers that are used by DM core */ +unsigned dm_get_reserved_bio_based_ios(void); +unsigned dm_get_reserved_rq_based_ios(void); + static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen) { return !maxlen || strlen(result) + 1 >= maxlen; diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c index d0fdc134068a..f6ff711aa5bb 100644 --- a/drivers/misc/mei/amthif.c +++ b/drivers/misc/mei/amthif.c @@ -57,6 +57,7 @@ void mei_amthif_reset_params(struct mei_device *dev) dev->iamthif_ioctl = false; dev->iamthif_state = MEI_IAMTHIF_IDLE; dev->iamthif_timer = 0; + dev->iamthif_stall_timer = 0; } /** diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 6d0282c08a06..cd2033cd7120 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -297,10 +297,13 @@ int __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) if (cl->reading_state != MEI_READ_COMPLETE && !waitqueue_active(&cl->rx_wait)) { + mutex_unlock(&dev->device_lock); if (wait_event_interruptible(cl->rx_wait, - (MEI_READ_COMPLETE == cl->reading_state))) { + cl->reading_state == MEI_READ_COMPLETE || + mei_cl_is_transitioning(cl))) { + if (signal_pending(current)) return -EINTR; return -ERESTARTSYS; diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 9eb031e92070..892cc4207fa2 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -90,6 +90,12 @@ static inline bool mei_cl_is_connected(struct mei_cl *cl) cl->dev->dev_state == MEI_DEV_ENABLED && cl->state == MEI_FILE_CONNECTED); } +static inline bool mei_cl_is_transitioning(struct mei_cl *cl) +{ + return (MEI_FILE_INITIALIZING == cl->state || + MEI_FILE_DISCONNECTED == cl->state || + MEI_FILE_DISCONNECTING == cl->state); +} bool mei_cl_is_other_connecting(struct mei_cl *cl); int mei_cl_disconnect(struct mei_cl *cl); diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index 6127ab64bb39..0a0448326e9d 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -35,11 +35,15 @@ static void mei_hbm_me_cl_allocate(struct mei_device *dev) struct mei_me_client *clients; int b; + dev->me_clients_num = 0; + dev->me_client_presentation_num = 0; + dev->me_client_index = 0; + /* count how many ME clients we have */ for_each_set_bit(b, dev->me_clients_map, MEI_CLIENTS_MAX) dev->me_clients_num++; - if (dev->me_clients_num <= 0) + if (dev->me_clients_num == 0) return; kfree(dev->me_clients); @@ -221,7 +225,7 @@ static int mei_hbm_prop_req(struct mei_device *dev) struct hbm_props_request *prop_req; const size_t len = sizeof(struct hbm_props_request); unsigned long next_client_index; - u8 client_num; + unsigned long client_num; client_num = dev->me_client_presentation_num; @@ -677,8 +681,6 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) if (dev->dev_state == MEI_DEV_INIT_CLIENTS && dev->hbm_state == MEI_HBM_ENUM_CLIENTS) { dev->init_clients_timer = 0; - dev->me_client_presentation_num = 0; - dev->me_client_index = 0; mei_hbm_me_cl_allocate(dev); dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES; diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 92c73118b13c..6197018e2f16 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -175,6 +175,9 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled) memset(&dev->wr_ext_msg, 0, sizeof(dev->wr_ext_msg)); } + /* we're already in reset, cancel the init timer */ + dev->init_clients_timer = 0; + dev->me_clients_num = 0; dev->rd_msg_hdr = 0; dev->wd_pending = false; diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 173ff095be0d..cabeddd66c1f 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -249,19 +249,16 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, mutex_unlock(&dev->device_lock); if (wait_event_interruptible(cl->rx_wait, - (MEI_READ_COMPLETE == cl->reading_state || - MEI_FILE_INITIALIZING == cl->state || - MEI_FILE_DISCONNECTED == cl->state || - MEI_FILE_DISCONNECTING == cl->state))) { + MEI_READ_COMPLETE == cl->reading_state || + mei_cl_is_transitioning(cl))) { + if (signal_pending(current)) return -EINTR; return -ERESTARTSYS; } mutex_lock(&dev->device_lock); - if (MEI_FILE_INITIALIZING == cl->state || - MEI_FILE_DISCONNECTED == cl->state || - MEI_FILE_DISCONNECTING == cl->state) { + if (mei_cl_is_transitioning(cl)) { rets = -EBUSY; goto out; } diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 7b918b2fb894..456b322013e2 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -396,9 +396,9 @@ struct mei_device { struct mei_me_client *me_clients; /* Note: memory has to be allocated */ DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX); DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX); - u8 me_clients_num; - u8 me_client_presentation_num; - u8 me_client_index; + unsigned long me_clients_num; + unsigned long me_client_presentation_num; + unsigned long me_client_index; struct mei_cl wd_cl; enum mei_wd_states wd_state; diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 87ed3fb5149a..f344659dceac 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -113,14 +113,14 @@ static const struct sh_mobile_sdhi_ops sdhi_ops = { }; static const struct of_device_id sh_mobile_sdhi_of_match[] = { - { .compatible = "renesas,shmobile-sdhi" }, - { .compatible = "renesas,sh7372-sdhi" }, - { .compatible = "renesas,sh73a0-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, - { .compatible = "renesas,r8a73a4-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, - { .compatible = "renesas,r8a7740-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, - { .compatible = "renesas,r8a7778-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, - { .compatible = "renesas,r8a7779-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, - { .compatible = "renesas,r8a7790-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-shmobile" }, + { .compatible = "renesas,sdhi-sh7372" }, + { .compatible = "renesas,sdhi-sh73a0", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-r8a73a4", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-r8a7740", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-r8a7778", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-r8a7779", .data = &sh_mobile_sdhi_of_cfg[0], }, + { .compatible = "renesas,sdhi-r8a7790", .data = &sh_mobile_sdhi_of_cfg[0], }, {}, }; MODULE_DEVICE_TABLE(of, sh_mobile_sdhi_of_match); diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 26b14f9fcac6..6bc9618af094 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -168,12 +168,25 @@ static inline int write_disable(struct m25p *flash) */ static inline int set_4byte(struct m25p *flash, u32 jedec_id, int enable) { + int status; + bool need_wren = false; + switch (JEDEC_MFR(jedec_id)) { - case CFI_MFR_MACRONIX: case CFI_MFR_ST: /* Micron, actually */ + /* Some Micron need WREN command; all will accept it */ + need_wren = true; + case CFI_MFR_MACRONIX: case 0xEF /* winbond */: + if (need_wren) + write_enable(flash); + flash->command[0] = enable ? OPCODE_EN4B : OPCODE_EX4B; - return spi_write(flash->spi, flash->command, 1); + status = spi_write(flash->spi, flash->command, 1); + + if (need_wren) + write_disable(flash); + + return status; default: /* Spansion style */ flash->command[0] = OPCODE_BRWR; diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 7ed4841327f2..d340b2f198c6 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2869,10 +2869,8 @@ static int nand_flash_detect_ext_param_page(struct mtd_info *mtd, len = le16_to_cpu(p->ext_param_page_length) * 16; ep = kmalloc(len, GFP_KERNEL); - if (!ep) { - ret = -ENOMEM; - goto ext_out; - } + if (!ep) + return -ENOMEM; /* Send our own NAND_CMD_PARAM. */ chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1); @@ -2920,7 +2918,7 @@ static int nand_flash_detect_ext_param_page(struct mtd_info *mtd, } pr_info("ONFI extended param page detected.\n"); - return 0; + ret = 0; ext_out: kfree(ep); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 55bbb8b8200c..e883bfe2e727 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1724,6 +1724,7 @@ static int __bond_release_one(struct net_device *bond_dev, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *oldcurrent; struct sockaddr addr; + int old_flags = bond_dev->flags; netdev_features_t old_features = bond_dev->features; /* slave is not a slave or master is not master of this slave */ @@ -1855,12 +1856,18 @@ static int __bond_release_one(struct net_device *bond_dev, * bond_change_active_slave(..., NULL) */ if (!USES_PRIMARY(bond->params.mode)) { - /* unset promiscuity level from slave */ - if (bond_dev->flags & IFF_PROMISC) + /* unset promiscuity level from slave + * NOTE: The NETDEV_CHANGEADDR call above may change the value + * of the IFF_PROMISC flag in the bond_dev, but we need the + * value of that flag before that change, as that was the value + * when this slave was attached, so we cache at the start of the + * function and use it here. Same goes for ALLMULTI below + */ + if (old_flags & IFF_PROMISC) dev_set_promiscuity(slave_dev, -1); /* unset allmulti level from slave */ - if (bond_dev->flags & IFF_ALLMULTI) + if (old_flags & IFF_ALLMULTI) dev_set_allmulti(slave_dev, -1); bond_hw_addr_flush(bond_dev, slave_dev); diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 71c677e651d7..3f21142138b7 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -702,7 +702,6 @@ static int flexcan_chip_start(struct net_device *dev) { struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->base; - unsigned int i; int err; u32 reg_mcr, reg_ctrl; @@ -772,17 +771,6 @@ static int flexcan_chip_start(struct net_device *dev) netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl); flexcan_write(reg_ctrl, ®s->ctrl); - for (i = 0; i < ARRAY_SIZE(regs->cantxfg); i++) { - flexcan_write(0, ®s->cantxfg[i].can_ctrl); - flexcan_write(0, ®s->cantxfg[i].can_id); - flexcan_write(0, ®s->cantxfg[i].data[0]); - flexcan_write(0, ®s->cantxfg[i].data[1]); - - /* put MB into rx queue */ - flexcan_write(FLEXCAN_MB_CNT_CODE(0x4), - ®s->cantxfg[i].can_ctrl); - } - /* acceptance mask/acceptance code (accept everything) */ flexcan_write(0x0, ®s->rxgmask); flexcan_write(0x0, ®s->rx14mask); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 874188ba06f7..25377e547f9b 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -76,6 +76,10 @@ MODULE_PARM_DESC(maxdev, "Maximum number of slcan interfaces"); /* maximum rx buffer len: extended CAN frame with timestamp */ #define SLC_MTU (sizeof("T1111222281122334455667788EA5F\r")+1) +#define SLC_CMD_LEN 1 +#define SLC_SFF_ID_LEN 3 +#define SLC_EFF_ID_LEN 8 + struct slcan { int magic; @@ -142,47 +146,63 @@ static void slc_bump(struct slcan *sl) { struct sk_buff *skb; struct can_frame cf; - int i, dlc_pos, tmp; - unsigned long ultmp; - char cmd = sl->rbuff[0]; - - if ((cmd != 't') && (cmd != 'T') && (cmd != 'r') && (cmd != 'R')) + int i, tmp; + u32 tmpid; + char *cmd = sl->rbuff; + + cf.can_id = 0; + + switch (*cmd) { + case 'r': + cf.can_id = CAN_RTR_FLAG; + /* fallthrough */ + case 't': + /* store dlc ASCII value and terminate SFF CAN ID string */ + cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN]; + sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN] = 0; + /* point to payload data behind the dlc */ + cmd += SLC_CMD_LEN + SLC_SFF_ID_LEN + 1; + break; + case 'R': + cf.can_id = CAN_RTR_FLAG; + /* fallthrough */ + case 'T': + cf.can_id |= CAN_EFF_FLAG; + /* store dlc ASCII value and terminate EFF CAN ID string */ + cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN]; + sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN] = 0; + /* point to payload data behind the dlc */ + cmd += SLC_CMD_LEN + SLC_EFF_ID_LEN + 1; + break; + default: return; + } - if (cmd & 0x20) /* tiny chars 'r' 't' => standard frame format */ - dlc_pos = 4; /* dlc position tiiid */ - else - dlc_pos = 9; /* dlc position Tiiiiiiiid */ - - if (!((sl->rbuff[dlc_pos] >= '0') && (sl->rbuff[dlc_pos] < '9'))) + if (kstrtou32(sl->rbuff + SLC_CMD_LEN, 16, &tmpid)) return; - cf.can_dlc = sl->rbuff[dlc_pos] - '0'; /* get can_dlc from ASCII val */ + cf.can_id |= tmpid; - sl->rbuff[dlc_pos] = 0; /* terminate can_id string */ - - if (kstrtoul(sl->rbuff+1, 16, &ultmp)) + /* get can_dlc from sanitized ASCII value */ + if (cf.can_dlc >= '0' && cf.can_dlc < '9') + cf.can_dlc -= '0'; + else return; - cf.can_id = ultmp; - - if (!(cmd & 0x20)) /* NO tiny chars => extended frame format */ - cf.can_id |= CAN_EFF_FLAG; - - if ((cmd | 0x20) == 'r') /* RTR frame */ - cf.can_id |= CAN_RTR_FLAG; - *(u64 *) (&cf.data) = 0; /* clear payload */ - for (i = 0, dlc_pos++; i < cf.can_dlc; i++) { - tmp = hex_to_bin(sl->rbuff[dlc_pos++]); - if (tmp < 0) - return; - cf.data[i] = (tmp << 4); - tmp = hex_to_bin(sl->rbuff[dlc_pos++]); - if (tmp < 0) - return; - cf.data[i] |= tmp; + /* RTR frames may have a dlc > 0 but they never have any data bytes */ + if (!(cf.can_id & CAN_RTR_FLAG)) { + for (i = 0; i < cf.can_dlc; i++) { + tmp = hex_to_bin(*cmd++); + if (tmp < 0) + return; + cf.data[i] = (tmp << 4); + tmp = hex_to_bin(*cmd++); + if (tmp < 0) + return; + cf.data[i] |= tmp; + } } skb = dev_alloc_skb(sizeof(struct can_frame) + @@ -209,7 +229,6 @@ static void slc_bump(struct slcan *sl) /* parse tty input stream */ static void slcan_unesc(struct slcan *sl, unsigned char s) { - if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && (sl->rcount > 4)) { @@ -236,27 +255,46 @@ static void slcan_unesc(struct slcan *sl, unsigned char s) /* Encapsulate one can_frame and stuff into a TTY queue. */ static void slc_encaps(struct slcan *sl, struct can_frame *cf) { - int actual, idx, i; - char cmd; + int actual, i; + unsigned char *pos; + unsigned char *endpos; + canid_t id = cf->can_id; + + pos = sl->xbuff; if (cf->can_id & CAN_RTR_FLAG) - cmd = 'R'; /* becomes 'r' in standard frame format */ + *pos = 'R'; /* becomes 'r' in standard frame format (SFF) */ else - cmd = 'T'; /* becomes 't' in standard frame format */ + *pos = 'T'; /* becomes 't' in standard frame format (SSF) */ - if (cf->can_id & CAN_EFF_FLAG) - sprintf(sl->xbuff, "%c%08X%d", cmd, - cf->can_id & CAN_EFF_MASK, cf->can_dlc); - else - sprintf(sl->xbuff, "%c%03X%d", cmd | 0x20, - cf->can_id & CAN_SFF_MASK, cf->can_dlc); + /* determine number of chars for the CAN-identifier */ + if (cf->can_id & CAN_EFF_FLAG) { + id &= CAN_EFF_MASK; + endpos = pos + SLC_EFF_ID_LEN; + } else { + *pos |= 0x20; /* convert R/T to lower case for SFF */ + id &= CAN_SFF_MASK; + endpos = pos + SLC_SFF_ID_LEN; + } - idx = strlen(sl->xbuff); + /* build 3 (SFF) or 8 (EFF) digit CAN identifier */ + pos++; + while (endpos >= pos) { + *endpos-- = hex_asc_upper[id & 0xf]; + id >>= 4; + } + + pos += (cf->can_id & CAN_EFF_FLAG) ? SLC_EFF_ID_LEN : SLC_SFF_ID_LEN; - for (i = 0; i < cf->can_dlc; i++) - sprintf(&sl->xbuff[idx + 2*i], "%02X", cf->data[i]); + *pos++ = cf->can_dlc + '0'; + + /* RTR frames may have a dlc > 0 but they never have any data bytes */ + if (!(cf->can_id & CAN_RTR_FLAG)) { + for (i = 0; i < cf->can_dlc; i++) + pos = hex_byte_pack_upper(pos, cf->data[i]); + } - strcat(sl->xbuff, "\r"); /* add terminating character */ + *pos++ = '\r'; /* Order of next two lines is *very* important. * When we are sending a little amount of data, @@ -267,8 +305,8 @@ static void slc_encaps(struct slcan *sl, struct can_frame *cf) * 14 Oct 1994 Dmitry Gorodchanin. */ set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - actual = sl->tty->ops->write(sl->tty, sl->xbuff, strlen(sl->xbuff)); - sl->xleft = strlen(sl->xbuff) - actual; + actual = sl->tty->ops->write(sl->tty, sl->xbuff, pos - sl->xbuff); + sl->xleft = (pos - sl->xbuff) - actual; sl->xhead = sl->xbuff + actual; sl->dev->stats.tx_bytes += cf->can_dlc; } @@ -286,11 +324,13 @@ static void slcan_write_wakeup(struct tty_struct *tty) if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) return; + spin_lock(&sl->lock); if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + spin_unlock(&sl->lock); netif_wake_queue(sl->dev); return; } @@ -298,6 +338,7 @@ static void slcan_write_wakeup(struct tty_struct *tty) actual = tty->ops->write(tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; + spin_unlock(&sl->lock); } /* Send a can_frame to a TTY queue. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index a0f647f92bf5..0b7a4c3b01a2 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -463,7 +463,7 @@ static int peak_usb_start(struct peak_usb_device *dev) if (i < PCAN_USB_MAX_TX_URBS) { if (i == 0) { netdev_err(netdev, "couldn't setup any tx URB\n"); - return err; + goto err_tx; } netdev_warn(netdev, "tx performance may be slow\n"); @@ -472,7 +472,7 @@ static int peak_usb_start(struct peak_usb_device *dev) if (dev->adapter->dev_start) { err = dev->adapter->dev_start(dev); if (err) - goto failed; + goto err_adapter; } dev->state |= PCAN_USB_STATE_STARTED; @@ -481,19 +481,26 @@ static int peak_usb_start(struct peak_usb_device *dev) if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 1); if (err) - goto failed; + goto err_adapter; } dev->can.state = CAN_STATE_ERROR_ACTIVE; return 0; -failed: +err_adapter: if (err == -ENODEV) netif_device_detach(dev->netdev); netdev_warn(netdev, "couldn't submit control: %d\n", err); + for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) { + usb_free_urb(dev->tx_contexts[i].urb); + dev->tx_contexts[i].urb = NULL; + } +err_tx: + usb_kill_anchored_urbs(&dev->rx_submitted); + return err; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 61726af1de6e..e66beff2704d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2481,8 +2481,7 @@ load_error_cnic2: load_error_cnic1: bnx2x_napi_disable_cnic(bp); /* Update the number of queues without the cnic queues */ - rc = bnx2x_set_real_num_queues(bp, 0); - if (rc) + if (bnx2x_set_real_num_queues(bp, 0)) BNX2X_ERR("Unable to set real_num_queues not including cnic\n"); load_error_cnic0: BNX2X_ERR("CNIC-related load failed\n"); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index d60a2ea3da19..51468227bf3b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -175,6 +175,7 @@ typedef int (*read_sfp_module_eeprom_func_p)(struct bnx2x_phy *phy, #define EDC_MODE_LINEAR 0x0022 #define EDC_MODE_LIMITING 0x0044 #define EDC_MODE_PASSIVE_DAC 0x0055 +#define EDC_MODE_ACTIVE_DAC 0x0066 /* ETS defines*/ #define DCBX_INVALID_COS (0xFF) @@ -3684,6 +3685,41 @@ static void bnx2x_warpcore_enable_AN_KR2(struct bnx2x_phy *phy, bnx2x_update_link_attr(params, vars->link_attr_sync); } +static void bnx2x_disable_kr2(struct link_params *params, + struct link_vars *vars, + struct bnx2x_phy *phy) +{ + struct bnx2x *bp = params->bp; + int i; + static struct bnx2x_reg_set reg_set[] = { + /* Step 1 - Program the TX/RX alignment markers */ + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xe647}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0xc4f0}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0x7690}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xe647}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0xc4f0}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000c}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6000}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0000}, + {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0002}, + {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x0000}, + {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x0af7}, + {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x0af7}, + {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002}, + {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000} + }; + DP(NETIF_MSG_LINK, "Disabling 20G-KR2\n"); + + for (i = 0; i < ARRAY_SIZE(reg_set); i++) + bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg, + reg_set[i].val); + vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE; + bnx2x_update_link_attr(params, vars->link_attr_sync); + + vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT; +} + static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy, struct link_params *params) { @@ -3715,7 +3751,6 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) { u16 lane, i, cl72_ctrl, an_adv = 0; - u16 ucode_ver; struct bnx2x *bp = params->bp; static struct bnx2x_reg_set reg_set[] = { {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7}, @@ -3806,15 +3841,7 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, /* Advertise pause */ bnx2x_ext_phy_set_pause(params, phy, vars); - /* Set KR Autoneg Work-Around flag for Warpcore version older than D108 - */ - bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_UC_INFO_B1_VERSION, &ucode_ver); - if (ucode_ver < 0xd108) { - DP(NETIF_MSG_LINK, "Enable AN KR work-around. WC ver:0x%x\n", - ucode_ver); - vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY; - } + vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY; bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL5_MISC7, 0x100); @@ -3838,6 +3865,8 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, bnx2x_set_aer_mmd(params, phy); bnx2x_warpcore_enable_AN_KR2(phy, params, vars); + } else { + bnx2x_disable_kr2(params, vars, phy); } /* Enable Autoneg: only on the main lane */ @@ -4347,20 +4376,14 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy, struct bnx2x *bp = params->bp; u32 serdes_net_if; u16 gp_status1 = 0, lnkup = 0, lnkup_kr = 0; - u16 lane = bnx2x_get_warpcore_lane(phy, params); vars->turn_to_run_wc_rt = vars->turn_to_run_wc_rt ? 0 : 1; if (!vars->turn_to_run_wc_rt) return; - /* Return if there is no link partner */ - if (!(bnx2x_warpcore_get_sigdet(phy, params))) { - DP(NETIF_MSG_LINK, "bnx2x_warpcore_get_sigdet false\n"); - return; - } - if (vars->rx_tx_asic_rst) { + u16 lane = bnx2x_get_warpcore_lane(phy, params); serdes_net_if = (REG_RD(bp, params->shmem_base + offsetof(struct shmem_region, dev_info. port_hw_config[params->port].default_cfg)) & @@ -4375,14 +4398,8 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy, /*10G KR*/ lnkup_kr = (gp_status1 >> (12+lane)) & 0x1; - DP(NETIF_MSG_LINK, - "gp_status1 0x%x\n", gp_status1); - if (lnkup_kr || lnkup) { - vars->rx_tx_asic_rst = 0; - DP(NETIF_MSG_LINK, - "link up, rx_tx_asic_rst 0x%x\n", - vars->rx_tx_asic_rst); + vars->rx_tx_asic_rst = 0; } else { /* Reset the lane to see if link comes up.*/ bnx2x_warpcore_reset_lane(bp, phy, 1); @@ -4507,10 +4524,14 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy, * enabled transmitter to avoid current leakage in case * no module is connected */ - if (bnx2x_is_sfp_module_plugged(phy, params)) - bnx2x_sfp_module_detection(phy, params); - else - bnx2x_sfp_e3_set_transmitter(params, phy, 1); + if ((params->loopback_mode == LOOPBACK_NONE) || + (params->loopback_mode == LOOPBACK_EXT)) { + if (bnx2x_is_sfp_module_plugged(phy, params)) + bnx2x_sfp_module_detection(phy, params); + else + bnx2x_sfp_e3_set_transmitter(params, + phy, 1); + } bnx2x_warpcore_config_sfi(phy, params); break; @@ -5757,6 +5778,11 @@ static int bnx2x_warpcore_read_status(struct bnx2x_phy *phy, rc = bnx2x_get_link_speed_duplex(phy, params, vars, link_up, gp_speed, duplex); + /* In case of KR link down, start up the recovering procedure */ + if ((!link_up) && (phy->media_type == ETH_PHY_KR) && + (!(phy->flags & FLAGS_WC_DUAL_MODE))) + vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY; + DP(NETIF_MSG_LINK, "duplex %x flow_ctrl 0x%x link_status 0x%x\n", vars->duplex, vars->flow_ctrl, vars->link_status); return rc; @@ -6507,6 +6533,11 @@ static int bnx2x_link_initialize(struct link_params *params, params->phy[INT_PHY].config_init(phy, params, vars); } + /* Re-read this value in case it was changed inside config_init due to + * limitations of optic module + */ + vars->line_speed = params->phy[INT_PHY].req_line_speed; + /* Init external phy*/ if (non_ext_phy) { if (params->phy[INT_PHY].supported & @@ -8080,7 +8111,10 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, if (copper_module_type & SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE) { DP(NETIF_MSG_LINK, "Active Copper cable detected\n"); - check_limiting_mode = 1; + if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) + *edc_mode = EDC_MODE_ACTIVE_DAC; + else + check_limiting_mode = 1; } else if (copper_module_type & SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE) { DP(NETIF_MSG_LINK, @@ -8555,6 +8589,7 @@ static void bnx2x_warpcore_set_limiting_mode(struct link_params *params, mode = MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_DEFAULT; break; case EDC_MODE_PASSIVE_DAC: + case EDC_MODE_ACTIVE_DAC: mode = MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_DAC; break; default: @@ -9730,32 +9765,41 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy, MDIO_AN_DEVAD, MDIO_AN_REG_8481_1000T_CTRL, an_1000_val); - /* set 100 speed advertisement */ - if ((phy->req_line_speed == SPEED_AUTO_NEG) && - (phy->speed_cap_mask & - (PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL | - PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF))) { - an_10_100_val |= (1<<7); - /* Enable autoneg and restart autoneg for legacy speeds */ - autoneg_val |= (1<<9 | 1<<12); - - if (phy->req_duplex == DUPLEX_FULL) + /* Set 10/100 speed advertisement */ + if (phy->req_line_speed == SPEED_AUTO_NEG) { + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL) { + /* Enable autoneg and restart autoneg for legacy speeds + */ + autoneg_val |= (1<<9 | 1<<12); an_10_100_val |= (1<<8); - DP(NETIF_MSG_LINK, "Advertising 100M\n"); - } - /* set 10 speed advertisement */ - if (((phy->req_line_speed == SPEED_AUTO_NEG) && - (phy->speed_cap_mask & - (PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL | - PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) && - (phy->supported & - (SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full)))) { - an_10_100_val |= (1<<5); - autoneg_val |= (1<<9 | 1<<12); - if (phy->req_duplex == DUPLEX_FULL) + DP(NETIF_MSG_LINK, "Advertising 100M-FD\n"); + } + + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF) { + /* Enable autoneg and restart autoneg for legacy speeds + */ + autoneg_val |= (1<<9 | 1<<12); + an_10_100_val |= (1<<7); + DP(NETIF_MSG_LINK, "Advertising 100M-HD\n"); + } + + if ((phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL) && + (phy->supported & SUPPORTED_10baseT_Full)) { an_10_100_val |= (1<<6); - DP(NETIF_MSG_LINK, "Advertising 10M\n"); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 10M-FD\n"); + } + + if ((phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF) && + (phy->supported & SUPPORTED_10baseT_Half)) { + an_10_100_val |= (1<<5); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 10M-HD\n"); + } } /* Only 10/100 are allowed to work in FORCE mode */ @@ -13432,43 +13476,6 @@ static void bnx2x_sfp_tx_fault_detection(struct bnx2x_phy *phy, } } } -static void bnx2x_disable_kr2(struct link_params *params, - struct link_vars *vars, - struct bnx2x_phy *phy) -{ - struct bnx2x *bp = params->bp; - int i; - static struct bnx2x_reg_set reg_set[] = { - /* Step 1 - Program the TX/RX alignment markers */ - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xe647}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0xc4f0}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0x7690}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xe647}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0xc4f0}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000c}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6000}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0000}, - {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0002}, - {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x0000}, - {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x0af7}, - {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x0af7}, - {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002}, - {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000} - }; - DP(NETIF_MSG_LINK, "Disabling 20G-KR2\n"); - - for (i = 0; i < ARRAY_SIZE(reg_set); i++) - bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg, - reg_set[i].val); - vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE; - bnx2x_update_link_attr(params, vars->link_attr_sync); - - vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT; - /* Restart AN on leading lane */ - bnx2x_warpcore_restart_AN_KR(phy, params); -} - static void bnx2x_kr2_recovery(struct link_params *params, struct link_vars *vars, struct bnx2x_phy *phy) @@ -13546,6 +13553,8 @@ static void bnx2x_check_kr2_wa(struct link_params *params, /* Disable KR2 on both lanes */ DP(NETIF_MSG_LINK, "BP=0x%x, NP=0x%x\n", base_page, next_page); bnx2x_disable_kr2(params, vars, phy); + /* Restart AN on leading lane */ + bnx2x_warpcore_restart_AN_KR(phy, params); return; } } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index a6704b555042..82b658d8c04c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -4703,6 +4703,14 @@ bool bnx2x_chk_parity_attn(struct bnx2x *bp, bool *global, bool print) attn.sig[3] = REG_RD(bp, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); + /* Since MCP attentions can't be disabled inside the block, we need to + * read AEU registers to see whether they're currently disabled + */ + attn.sig[3] &= ((REG_RD(bp, + !port ? MISC_REG_AEU_ENABLE4_FUNC_0_OUT_0 + : MISC_REG_AEU_ENABLE4_FUNC_1_OUT_0) & + MISC_AEU_ENABLE_MCP_PRTY_BITS) | + ~MISC_AEU_ENABLE_MCP_PRTY_BITS); if (!CHIP_IS_E1x(bp)) attn.sig[4] = REG_RD(bp, @@ -5447,26 +5455,24 @@ static void bnx2x_timer(unsigned long data) if (IS_PF(bp) && !BP_NOMCP(bp)) { int mb_idx = BP_FW_MB_IDX(bp); - u32 drv_pulse; - u32 mcp_pulse; + u16 drv_pulse; + u16 mcp_pulse; ++bp->fw_drv_pulse_wr_seq; bp->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK; - /* TBD - add SYSTEM_TIME */ drv_pulse = bp->fw_drv_pulse_wr_seq; bnx2x_drv_pulse(bp); mcp_pulse = (SHMEM_RD(bp, func_mb[mb_idx].mcp_pulse_mb) & MCP_PULSE_SEQ_MASK); /* The delta between driver pulse and mcp response - * should be 1 (before mcp response) or 0 (after mcp response) + * should not get too big. If the MFW is more than 5 pulses + * behind, we should worry about it enough to generate an error + * log. */ - if ((drv_pulse != mcp_pulse) && - (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) { - /* someone lost a heartbeat... */ - BNX2X_ERR("drv_pulse (0x%x) != mcp_pulse (0x%x)\n", + if (((drv_pulse - mcp_pulse) & MCP_PULSE_SEQ_MASK) > 5) + BNX2X_ERR("MFW seems hanged: drv_pulse (0x%x) != mcp_pulse (0x%x)\n", drv_pulse, mcp_pulse); - } } if (bp->state == BNX2X_STATE_OPEN) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 2604b6204abe..9ad012bdd915 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1819,7 +1819,7 @@ bnx2x_get_vf_igu_cam_info(struct bnx2x *bp) fid = GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID); if (fid & IGU_FID_ENCODE_IS_PF) current_pf = fid & IGU_FID_PF_NUM_MASK; - else if (current_pf == BP_ABS_FUNC(bp)) + else if (current_pf == BP_FUNC(bp)) bnx2x_vf_set_igu_info(bp, sb_id, (fid & IGU_FID_VF_NUM_MASK)); DP(BNX2X_MSG_IOV, "%s[%d], igu_sb_id=%d, msix=%d\n", @@ -3180,6 +3180,7 @@ int bnx2x_enable_sriov(struct bnx2x *bp) /* set local queue arrays */ vf->vfqs = &bp->vfdb->vfqs[qcount]; qcount += vf_sb_count(vf); + bnx2x_iov_static_resc(bp, vf); } /* prepare msix vectors in VF configuration space */ @@ -3187,6 +3188,8 @@ int bnx2x_enable_sriov(struct bnx2x *bp) bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf_idx)); REG_WR(bp, PCICFG_OFFSET + GRC_CONFIG_REG_VF_MSIX_CONTROL, num_vf_queues); + DP(BNX2X_MSG_IOV, "set msix vec num in VF %d cfg space to %d\n", + vf_idx, num_vf_queues); } bnx2x_pretend_func(bp, BP_ABS_FUNC(bp)); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 6cfb88732452..da16953eb2ec 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -1765,28 +1765,28 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf, switch (mbx->first_tlv.tl.type) { case CHANNEL_TLV_ACQUIRE: bnx2x_vf_mbx_acquire(bp, vf, mbx); - break; + return; case CHANNEL_TLV_INIT: bnx2x_vf_mbx_init_vf(bp, vf, mbx); - break; + return; case CHANNEL_TLV_SETUP_Q: bnx2x_vf_mbx_setup_q(bp, vf, mbx); - break; + return; case CHANNEL_TLV_SET_Q_FILTERS: bnx2x_vf_mbx_set_q_filters(bp, vf, mbx); - break; + return; case CHANNEL_TLV_TEARDOWN_Q: bnx2x_vf_mbx_teardown_q(bp, vf, mbx); - break; + return; case CHANNEL_TLV_CLOSE: bnx2x_vf_mbx_close_vf(bp, vf, mbx); - break; + return; case CHANNEL_TLV_RELEASE: bnx2x_vf_mbx_release_vf(bp, vf, mbx); - break; + return; case CHANNEL_TLV_UPDATE_RSS: bnx2x_vf_mbx_update_rss(bp, vf, mbx); - break; + return; } } else { @@ -1802,26 +1802,24 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf, for (i = 0; i < 20; i++) DP_CONT(BNX2X_MSG_IOV, "%x ", mbx->msg->req.tlv_buf_size.tlv_buffer[i]); + } - /* test whether we can respond to the VF (do we have an address - * for it?) - */ - if (vf->state == VF_ACQUIRED || vf->state == VF_ENABLED) { - /* mbx_resp uses the op_rc of the VF */ - vf->op_rc = PFVF_STATUS_NOT_SUPPORTED; + /* can we respond to VF (do we have an address for it?) */ + if (vf->state == VF_ACQUIRED || vf->state == VF_ENABLED) { + /* mbx_resp uses the op_rc of the VF */ + vf->op_rc = PFVF_STATUS_NOT_SUPPORTED; - /* notify the VF that we do not support this request */ - bnx2x_vf_mbx_resp(bp, vf); - } else { - /* can't send a response since this VF is unknown to us - * just ack the FW to release the mailbox and unlock - * the channel. - */ - storm_memset_vf_mbx_ack(bp, vf->abs_vfid); - mmiowb(); - bnx2x_unlock_vf_pf_channel(bp, vf, - mbx->first_tlv.tl.type); - } + /* notify the VF that we do not support this request */ + bnx2x_vf_mbx_resp(bp, vf); + } else { + /* can't send a response since this VF is unknown to us + * just ack the FW to release the mailbox and unlock + * the channel. + */ + storm_memset_vf_mbx_ack(bp, vf->abs_vfid); + /* Firmware ack should be written before unlocking channel */ + mmiowb(); + bnx2x_unlock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type); } } diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index ace5050dba38..db020230bd0b 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -88,6 +88,7 @@ static inline char *nic_name(struct pci_dev *pdev) #define BE_MIN_MTU 256 #define BE_NUM_VLANS_SUPPORTED 64 +#define BE_UMC_NUM_VLANS_SUPPORTED 15 #define BE_MAX_EQD 96u #define BE_MAX_TX_FRAG_COUNT 30 @@ -333,6 +334,7 @@ enum vf_state { #define BE_FLAGS_LINK_STATUS_INIT 1 #define BE_FLAGS_WORKER_SCHEDULED (1 << 3) +#define BE_FLAGS_VLAN_PROMISC (1 << 4) #define BE_FLAGS_NAPI_ENABLED (1 << 9) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1ab5dab11eff..bd0e0c0bbcd8 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -180,6 +180,9 @@ static int be_mcc_compl_process(struct be_adapter *adapter, dev_err(&adapter->pdev->dev, "opcode %d-%d failed:status %d-%d\n", opcode, subsystem, compl_status, extd_status); + + if (extd_status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES) + return extd_status; } } done: @@ -1812,6 +1815,12 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) } else if (flags & IFF_ALLMULTI) { req->if_flags_mask = req->if_flags = cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS); + } else if (flags & BE_FLAGS_VLAN_PROMISC) { + req->if_flags_mask = cpu_to_le32(BE_IF_FLAGS_VLAN_PROMISCUOUS); + + if (value == ON) + req->if_flags = + cpu_to_le32(BE_IF_FLAGS_VLAN_PROMISCUOUS); } else { struct netdev_hw_addr *ha; int i = 0; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index d026226db88c..108ca8abf0af 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -60,6 +60,8 @@ enum { MCC_STATUS_NOT_SUPPORTED = 66 }; +#define MCC_ADDL_STS_INSUFFICIENT_RESOURCES 0x16 + #define CQE_STATUS_COMPL_MASK 0xFFFF #define CQE_STATUS_COMPL_SHIFT 0 /* bits 0 - 15 */ #define CQE_STATUS_EXTD_MASK 0xFFFF @@ -1791,7 +1793,7 @@ struct be_nic_res_desc { u8 acpi_params; u8 wol_param; u16 rsvd7; - u32 rsvd8[3]; + u32 rsvd8[7]; } __packed; struct be_cmd_req_get_func_config { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 100b528b9bd0..2c38cc402119 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -855,11 +855,11 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, unsigned int eth_hdr_len; struct iphdr *ip; - /* Lancer ASIC has a bug wherein packets that are 32 bytes or less + /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or less * may cause a transmit stall on that port. So the work-around is to - * pad such packets to a 36-byte length. + * pad short packets (<= 32 bytes) to a 36-byte length. */ - if (unlikely(lancer_chip(adapter) && skb->len <= 32)) { + if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { if (skb_padto(skb, 36)) goto tx_drop; skb->len = 36; @@ -1013,18 +1013,40 @@ static int be_vid_config(struct be_adapter *adapter) status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 1, 0); - /* Set to VLAN promisc mode as setting VLAN filter failed */ if (status) { - dev_info(&adapter->pdev->dev, "Exhausted VLAN HW filters.\n"); - dev_info(&adapter->pdev->dev, "Disabling HW VLAN filtering.\n"); - goto set_vlan_promisc; + /* Set to VLAN promisc mode as setting VLAN filter failed */ + if (status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES) + goto set_vlan_promisc; + dev_err(&adapter->pdev->dev, + "Setting HW VLAN filtering failed.\n"); + } else { + if (adapter->flags & BE_FLAGS_VLAN_PROMISC) { + /* hw VLAN filtering re-enabled. */ + status = be_cmd_rx_filter(adapter, + BE_FLAGS_VLAN_PROMISC, OFF); + if (!status) { + dev_info(&adapter->pdev->dev, + "Disabling VLAN Promiscuous mode.\n"); + adapter->flags &= ~BE_FLAGS_VLAN_PROMISC; + dev_info(&adapter->pdev->dev, + "Re-Enabling HW VLAN filtering\n"); + } + } } return status; set_vlan_promisc: - status = be_cmd_vlan_config(adapter, adapter->if_handle, - NULL, 0, 1, 1); + dev_warn(&adapter->pdev->dev, "Exhausted VLAN HW filters.\n"); + + status = be_cmd_rx_filter(adapter, BE_FLAGS_VLAN_PROMISC, ON); + if (!status) { + dev_info(&adapter->pdev->dev, "Enable VLAN Promiscuous mode\n"); + dev_info(&adapter->pdev->dev, "Disabling HW VLAN filtering\n"); + adapter->flags |= BE_FLAGS_VLAN_PROMISC; + } else + dev_err(&adapter->pdev->dev, + "Failed to enable VLAN Promiscuous mode.\n"); return status; } @@ -1033,10 +1055,6 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; - if (!lancer_chip(adapter) && !be_physfn(adapter)) { - status = -EINVAL; - goto ret; - } /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) @@ -1059,11 +1077,6 @@ static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; - if (!lancer_chip(adapter) && !be_physfn(adapter)) { - status = -EINVAL; - goto ret; - } - /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) goto ret; @@ -1188,8 +1201,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf, vi->vf = vf; vi->tx_rate = vf_cfg->tx_rate; - vi->vlan = vf_cfg->vlan_tag; - vi->qos = 0; + vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK; + vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT; memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN); return 0; @@ -1199,28 +1212,29 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) { struct be_adapter *adapter = netdev_priv(netdev); + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; int status = 0; if (!sriov_enabled(adapter)) return -EPERM; - if (vf >= adapter->num_vfs || vlan > 4095) + if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7) return -EINVAL; - if (vlan) { - if (adapter->vf_cfg[vf].vlan_tag != vlan) { + if (vlan || qos) { + vlan |= qos << VLAN_PRIO_SHIFT; + if (vf_cfg->vlan_tag != vlan) { /* If this is new value, program it. Else skip. */ - adapter->vf_cfg[vf].vlan_tag = vlan; - - status = be_cmd_set_hsw_config(adapter, vlan, - vf + 1, adapter->vf_cfg[vf].if_handle, 0); + vf_cfg->vlan_tag = vlan; + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, + vf_cfg->if_handle, 0); } } else { /* Reset Transparent Vlan Tagging. */ - adapter->vf_cfg[vf].vlan_tag = 0; - vlan = adapter->vf_cfg[vf].def_vid; + vf_cfg->vlan_tag = 0; + vlan = vf_cfg->def_vid; status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, - adapter->vf_cfg[vf].if_handle, 0); + vf_cfg->if_handle, 0); } @@ -2963,6 +2977,8 @@ static void BEx_get_resources(struct be_adapter *adapter, if (adapter->function_mode & FLEX10_MODE) res->max_vlans = BE_NUM_VLANS_SUPPORTED/8; + else if (adapter->function_mode & UMC_ENABLED) + res->max_vlans = BE_UMC_NUM_VLANS_SUPPORTED; else res->max_vlans = BE_NUM_VLANS_SUPPORTED; res->max_mcast_mac = BE_MAX_MC; diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 098f133908ae..e006a09ba899 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -452,7 +452,9 @@ static int gianfar_ptp_probe(struct platform_device *dev) err = -ENODEV; etsects->caps = ptp_gianfar_caps; - etsects->cksel = DEFAULT_CKSEL; + + if (get_of_u32(node, "fsl,cksel", &etsects->cksel)) + etsects->cksel = DEFAULT_CKSEL; if (get_of_u32(node, "fsl,tclk-period", &etsects->tclk_period) || get_of_u32(node, "fsl,tmr-prsc", &etsects->tmr_prsc) || diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 0c524fa9f811..cfef7fc32cdd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -701,8 +701,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use); if (cmd_details) { - memcpy(details, cmd_details, - sizeof(struct i40e_asq_cmd_details)); + *details = *cmd_details; /* If the cmd_details are defined copy the cookie. The * cpu_to_le32 is not needed here because the data is ignored @@ -760,7 +759,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use); /* if the desc is available copy the temp desc to the right place */ - memcpy(desc_on_ring, desc, sizeof(struct i40e_aq_desc)); + *desc_on_ring = *desc; /* if buff is not NULL assume indirect command */ if (buff != NULL) { @@ -807,7 +806,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, /* if ready, copy the desc back to temp */ if (i40e_asq_done(hw)) { - memcpy(desc, desc_on_ring, sizeof(struct i40e_aq_desc)); + *desc = *desc_on_ring; if (buff != NULL) memcpy(buff, dma_buff->va, buff_size); retval = le16_to_cpu(desc->retval); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index c21df7bc3b1d..1e4ea134975a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -507,7 +507,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, /* save link status information */ if (link) - memcpy(link, hw_link_info, sizeof(struct i40e_link_status)); + *link = *hw_link_info; /* flag cleared so helper functions don't call AQ again */ hw->phy.get_link_info = false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 601d482694ea..221aa4795017 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -101,10 +101,10 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem, mem->size = ALIGN(size, alignment); mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size, &mem->pa, GFP_KERNEL); - if (mem->va) - return 0; + if (!mem->va) + return -ENOMEM; - return -ENOMEM; + return 0; } /** @@ -136,10 +136,10 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem, mem->size = size; mem->va = kzalloc(size, GFP_KERNEL); - if (mem->va) - return 0; + if (!mem->va) + return -ENOMEM; - return -ENOMEM; + return 0; } /** @@ -174,8 +174,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) { int ret = -ENOMEM; - int i = 0; - int j = 0; + int i, j; if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, @@ -186,7 +185,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, /* start the linear search with an imperfect hint */ i = pile->search_hint; - while (i < pile->num_entries && ret < 0) { + while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { i++; @@ -205,6 +204,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; pile->search_hint = i + j; + break; } else { /* not enough, so skip over it and continue looking */ i += j; @@ -1388,7 +1388,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) bool add_happened = false; int filter_list_len = 0; u32 changed_flags = 0; - i40e_status ret = 0; + i40e_status aq_ret = 0; struct i40e_pf *pf; int num_add = 0; int num_del = 0; @@ -1449,28 +1449,28 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_del == filter_list_len) { - ret = i40e_aq_remove_macvlan(&pf->hw, + aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, del_list, num_del, NULL); num_del = 0; memset(del_list, 0, sizeof(*del_list)); - if (ret) + if (aq_ret) dev_info(&pf->pdev->dev, "ignoring delete macvlan error, err %d, aq_err %d while flushing a full buffer\n", - ret, + aq_ret, pf->hw.aq.asq_last_status); } } if (num_del) { - ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, + aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, del_list, num_del, NULL); num_del = 0; - if (ret) + if (aq_ret) dev_info(&pf->pdev->dev, "ignoring delete macvlan error, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); } kfree(del_list); @@ -1515,32 +1515,30 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { - ret = i40e_aq_add_macvlan(&pf->hw, - vsi->seid, - add_list, - num_add, - NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, + NULL); num_add = 0; - if (ret) + if (aq_ret) break; memset(add_list, 0, sizeof(*add_list)); } } if (num_add) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, NULL); num_add = 0; } kfree(add_list); add_list = NULL; - if (add_happened && (!ret)) { + if (add_happened && (!aq_ret)) { /* do nothing */; - } else if (add_happened && (ret)) { + } else if (add_happened && (aq_ret)) { dev_info(&pf->pdev->dev, "add filter failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) && !test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { @@ -1556,28 +1554,27 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI); - ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, - vsi->seid, - cur_multipromisc, - NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, + vsi->seid, + cur_multipromisc, + NULL); + if (aq_ret) dev_info(&pf->pdev->dev, "set multi promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); } if ((changed_flags & IFF_PROMISC) || promisc_forced_on) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)); - ret = i40e_aq_set_vsi_unicast_promiscuous(&vsi->back->hw, - vsi->seid, - cur_promisc, - NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_unicast_promiscuous(&vsi->back->hw, + vsi->seid, + cur_promisc, NULL); + if (aq_ret) dev_info(&pf->pdev->dev, "set uni promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); } clear_bit(__I40E_CONFIG_BUSY, &vsi->state); @@ -1790,6 +1787,8 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) * i40e_vsi_kill_vlan - Remove vsi membership for given vlan * @vsi: the vsi being configured * @vid: vlan id to be removed (0 = untagged only , -1 = any) + * + * Return: 0 on success or negative otherwise **/ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) { @@ -1863,37 +1862,39 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload * @netdev: network interface to be adjusted * @vid: vlan id to be added + * + * net_device_ops implementation for adding vlan ids **/ static int i40e_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, u16 vid) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - int ret; + int ret = 0; if (vid > 4095) - return 0; + return -EINVAL; + + netdev_info(netdev, "adding %pM vid=%d\n", netdev->dev_addr, vid); - netdev_info(vsi->netdev, "adding %pM vid=%d\n", - netdev->dev_addr, vid); /* If the network stack called us with vid = 0, we should * indicate to i40e_vsi_add_vlan() that we want to receive * any traffic (i.e. with any vlan tag, or untagged) */ ret = i40e_vsi_add_vlan(vsi, vid ? vid : I40E_VLAN_ANY); - if (!ret) { - if (vid < VLAN_N_VID) - set_bit(vid, vsi->active_vlans); - } + if (!ret && (vid < VLAN_N_VID)) + set_bit(vid, vsi->active_vlans); - return 0; + return ret; } /** * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted * @vid: vlan id to be removed + * + * net_device_ops implementation for adding vlan ids **/ static int i40e_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, u16 vid) @@ -1901,15 +1902,16 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev, struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - netdev_info(vsi->netdev, "removing %pM vid=%d\n", - netdev->dev_addr, vid); + netdev_info(netdev, "removing %pM vid=%d\n", netdev->dev_addr, vid); + /* return code is ignored as there is nothing a user * can do about failure to remove and a log message was - * already printed from another function + * already printed from the other function */ i40e_vsi_kill_vlan(vsi, vid); clear_bit(vid, vsi->active_vlans); + return 0; } @@ -1936,10 +1938,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi) * @vsi: the vsi being adjusted * @vid: the vlan id to set as a PVID **/ -i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) +int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) { struct i40e_vsi_context ctxt; - i40e_status ret; + i40e_status aq_ret; vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.pvid = cpu_to_le16(vid); @@ -1948,14 +1950,15 @@ i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) ctxt.seid = vsi->seid; memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); - ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); - if (ret) { + aq_ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + if (aq_ret) { dev_info(&vsi->back->pdev->dev, "%s: update vsi failed, aq_err=%d\n", __func__, vsi->back->hw.aq.asq_last_status); + return -ENOENT; } - return ret; + return 0; } /** @@ -3326,7 +3329,8 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) **/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) { - int num_tc = 0, i; + u8 num_tc = 0; + int i; /* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority @@ -3341,9 +3345,7 @@ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) /* Traffic class index starts from zero so * increment to return the actual count */ - num_tc++; - - return num_tc; + return num_tc + 1; } /** @@ -3451,28 +3453,27 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; + i40e_status aq_ret; u32 tc_bw_max; - int ret; int i; /* Get the VSI level BW configuration */ - ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); - if (ret) { + aq_ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); + if (aq_ret) { dev_info(&pf->pdev->dev, "couldn't get pf vsi bw config, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); - return ret; + aq_ret, pf->hw.aq.asq_last_status); + return -EINVAL; } /* Get the VSI level BW configuration per TC */ - ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, - &bw_ets_config, - NULL); - if (ret) { + aq_ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config, + NULL); + if (aq_ret) { dev_info(&pf->pdev->dev, "couldn't get pf vsi ets bw config, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); - return ret; + aq_ret, pf->hw.aq.asq_last_status); + return -EINVAL; } if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) { @@ -3494,7 +3495,8 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) /* 3 bits out of 4 for each TC */ vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7); } - return ret; + + return 0; } /** @@ -3505,30 +3507,30 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) * * Returns 0 on success, negative value on failure **/ -static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, - u8 enabled_tc, +static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; - int i, ret = 0; + i40e_status aq_ret; + int i; bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; - ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, - &bw_data, NULL); - if (ret) { + aq_ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data, + NULL); + if (aq_ret) { dev_info(&vsi->back->pdev->dev, "%s: AQ command Config VSI BW allocation per TC failed = %d\n", __func__, vsi->back->hw.aq.asq_last_status); - return ret; + return -EINVAL; } for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) vsi->info.qs_handle[i] = bw_data.qs_handles[i]; - return ret; + return 0; } /** diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 48cbc833b051..86d51429a189 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1607,6 +1607,9 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); igb_write_phy_reg(hw, PHY_CONTROL, 0x4140); } + } else if (hw->phy.type == e1000_phy_82580) { + /* enable MII loopback */ + igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041); } /* add small delay to avoid loopback test failure */ diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 1a9c4f6269ea..ecc7f7b696b8 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3086,13 +3086,16 @@ static struct sk_buff *skge_rx_get(struct net_device *dev, PCI_DMA_FROMDEVICE); skge_rx_reuse(e, skge->rx_buf_size); } else { + struct skge_element ee; struct sk_buff *nskb; nskb = netdev_alloc_skb_ip_align(dev, skge->rx_buf_size); if (!nskb) goto resubmit; - skb = e->skb; + ee = *e; + + skb = ee.skb; prefetch(skb->data); if (skge_rx_setup(skge, e, nskb, skge->rx_buf_size) < 0) { @@ -3101,8 +3104,8 @@ static struct sk_buff *skge_rx_get(struct net_device *dev, } pci_unmap_single(skge->hw->pdev, - dma_unmap_addr(e, mapaddr), - dma_unmap_len(e, maplen), + dma_unmap_addr(&ee, mapaddr), + dma_unmap_len(&ee, maplen), PCI_DMA_FROMDEVICE); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5472cbd34028..6ca30739625f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -180,28 +180,32 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token) +static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, + int csum) { block->token = token; - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); + if (csum) { + block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - + sizeof(block->data) - 2); + block->sig = ~xor8_buf(block, sizeof(*block) - 1); + } } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token) +static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) { struct mlx5_cmd_mailbox *next = msg->next; while (next) { - calc_block_sig(next->buf, token); + calc_block_sig(next->buf, token, csum); next = next->next; } } -static void set_signature(struct mlx5_cmd_work_ent *ent) +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token); - calc_chain_sig(ent->out, ent->token); + calc_chain_sig(ent->in, ent->token, csum); + calc_chain_sig(ent->out, ent->token, csum); } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -539,8 +543,7 @@ static void cmd_work_handler(struct work_struct *work) lay->type = MLX5_PCI_CMD_XPORT; lay->token = ent->token; lay->status_own = CMD_OWNER_HW; - if (!cmd->checksum_disabled) - set_signature(ent); + set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ktime_get_ts(&ent->ts1); @@ -773,8 +776,6 @@ static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); block = next->buf; - if (xor8_buf(block, sizeof(*block)) != 0xff) - return -EINVAL; memcpy(to, block->data, copy); to += copy; @@ -1361,6 +1362,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_map; } + cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; @@ -1510,7 +1512,7 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr) case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; - case MLX5_CMD_STAT_LIM_ERR: return -EINVAL; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; case MLX5_CMD_STAT_IX_ERR: return -EINVAL; case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 443cc4d7b024..2231d93cc7ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -366,9 +366,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } + snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + name, pci_name(dev->pdev)); eq->eqn = out.eq_number; err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - name, eq); + eq->name, eq); if (err) goto err_eq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b47739b0b5f6..bc0f5fb66e24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -165,9 +165,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL; struct mlx5_cmd_query_hca_cap_mbox_in query_ctx; struct mlx5_cmd_set_hca_cap_mbox_out set_out; - struct mlx5_profile *prof = dev->profile; u64 flags; - int csum = 1; int err; memset(&query_ctx, 0, sizeof(query_ctx)); @@ -197,20 +195,14 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) memcpy(&set_ctx->hca_cap, &query_out->hca_cap, sizeof(set_ctx->hca_cap)); - if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) { - csum = !!prof->cmdif_csum; - flags = be64_to_cpu(set_ctx->hca_cap.flags); - if (csum) - flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - else - flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - - set_ctx->hca_cap.flags = cpu_to_be64(flags); - } - if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; + flags = be64_to_cpu(query_out->hca_cap.flags); + /* disable checksum */ + flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + + set_ctx->hca_cap.flags = cpu_to_be64(flags); memset(&set_out, 0, sizeof(set_out)); set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12); set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP); @@ -225,9 +217,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) if (err) goto query_ex; - if (!csum) - dev->cmd.checksum_disabled = 1; - query_ex: kfree(query_out); kfree(set_ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 3a2408d44820..7b12acf210f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -90,6 +90,10 @@ struct mlx5_manage_pages_outbox { __be64 pas[0]; }; +enum { + MAX_RECLAIM_TIME_MSECS = 5000, +}; + static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) { struct rb_root *root = &dev->priv.page_root; @@ -279,6 +283,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int err; int i; + if (nclaimed) + *nclaimed = 0; + memset(&in, 0, sizeof(in)); outlen = sizeof(*out) + npages * sizeof(out->pas[0]); out = mlx5_vzalloc(outlen); @@ -388,20 +395,25 @@ static int optimal_reclaimed_pages(void) int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) { - unsigned long end = jiffies + msecs_to_jiffies(5000); + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); struct fw_page *fwp; struct rb_node *p; + int nclaimed = 0; int err; do { p = rb_first(&dev->priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL); + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); return err; } + if (nclaimed) + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); } if (time_after(jiffies, end)) { mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 83c2091c9c23..bd1a2d2bc2ae 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -543,7 +543,7 @@ static const struct of_device_id moxart_mac_match[] = { { } }; -struct __initdata platform_driver moxart_mac_driver = { +static struct platform_driver moxart_mac_driver = { .probe = moxart_mac_probe, .remove = moxart_remove, .driver = { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 4d7ad0074d1c..ebe4c86e5230 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1794,3 +1794,11 @@ const struct ethtool_ops qlcnic_sriov_vf_ethtool_ops = { .set_msglevel = qlcnic_set_msglevel, .get_msglevel = qlcnic_get_msglevel, }; + +const struct ethtool_ops qlcnic_ethtool_failed_ops = { + .get_settings = qlcnic_get_settings, + .get_drvinfo = qlcnic_get_drvinfo, + .set_msglevel = qlcnic_set_msglevel, + .get_msglevel = qlcnic_get_msglevel, + .set_dump = qlcnic_set_dump, +}; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index c4c5023e1fdf..21d00a0449a1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -431,6 +431,9 @@ static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) while (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) usleep_range(10000, 11000); + if (!adapter->fw_work.work.func) + return; + cancel_delayed_work_sync(&adapter->fw_work); } @@ -2275,8 +2278,9 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->portnum = adapter->ahw->pci_func; err = qlcnic_start_firmware(adapter); if (err) { - dev_err(&pdev->dev, "Loading fw failed.Please Reboot\n"); - goto err_out_free_hw; + dev_err(&pdev->dev, "Loading fw failed.Please Reboot\n" + "\t\tIf reboot doesn't help, try flashing the card\n"); + goto err_out_maintenance_mode; } qlcnic_get_multiq_capability(adapter); @@ -2408,6 +2412,22 @@ err_out_disable_pdev: pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); return err; + +err_out_maintenance_mode: + netdev->netdev_ops = &qlcnic_netdev_failed_ops; + SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops); + err = register_netdev(netdev); + + if (err) { + dev_err(&pdev->dev, "Failed to register net device\n"); + qlcnic_clr_all_drv_state(adapter, 0); + goto err_out_free_hw; + } + + pci_set_drvdata(pdev, adapter); + qlcnic_add_sysfs(adapter); + + return 0; } static void qlcnic_remove(struct pci_dev *pdev) @@ -2518,8 +2538,16 @@ static int qlcnic_resume(struct pci_dev *pdev) static int qlcnic_open(struct net_device *netdev) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + u32 state; int err; + state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); + if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) { + netdev_err(netdev, "%s: Device is in FAILED state\n", __func__); + + return -EIO; + } + netif_carrier_off(netdev); err = qlcnic_attach(adapter); @@ -3228,6 +3256,13 @@ void qlcnic_82xx_dev_request_reset(struct qlcnic_adapter *adapter, u32 key) return; state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); + if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) { + netdev_err(adapter->netdev, "%s: Device is in FAILED state\n", + __func__); + qlcnic_api_unlock(adapter); + + return; + } if (state == QLCNIC_DEV_READY) { QLC_SHARED_REG_WR32(adapter, QLCNIC_CRB_DEV_STATE, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 330d9a8774ad..686f460b1502 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -397,6 +397,7 @@ static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; + rtnl_lock(); if (netif_running(netdev)) __qlcnic_down(adapter, netdev); @@ -407,12 +408,15 @@ static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter) /* After disabling SRIOV re-init the driver in default mode configure opmode based on op_mode of function */ - if (qlcnic_83xx_configure_opmode(adapter)) + if (qlcnic_83xx_configure_opmode(adapter)) { + rtnl_unlock(); return -EIO; + } if (netif_running(netdev)) __qlcnic_up(adapter, netdev); + rtnl_unlock(); return 0; } @@ -533,6 +537,7 @@ static int qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, int num_vfs) return -EIO; } + rtnl_lock(); if (netif_running(netdev)) __qlcnic_down(adapter, netdev); @@ -555,6 +560,7 @@ static int qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, int num_vfs) __qlcnic_up(adapter, netdev); error: + rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index c6165d05cc13..019f4377307f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -1272,6 +1272,7 @@ void qlcnic_remove_sysfs_entries(struct qlcnic_adapter *adapter) void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) { struct device *dev = &adapter->pdev->dev; + u32 state; if (device_create_bin_file(dev, &bin_attr_port_stats)) dev_info(dev, "failed to create port stats sysfs entry"); @@ -1285,8 +1286,13 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) if (device_create_bin_file(dev, &bin_attr_mem)) dev_info(dev, "failed to create mem sysfs entry\n"); + state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); + if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) + return; + if (device_create_bin_file(dev, &bin_attr_pci_config)) dev_info(dev, "failed to create pci config sysfs entry"); + if (device_create_file(dev, &dev_attr_beacon)) dev_info(dev, "failed to create beacon sysfs entry"); @@ -1307,6 +1313,7 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter) void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter) { struct device *dev = &adapter->pdev->dev; + u32 state; device_remove_bin_file(dev, &bin_attr_port_stats); @@ -1315,6 +1322,11 @@ void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter) device_remove_file(dev, &dev_attr_diag_mode); device_remove_bin_file(dev, &bin_attr_crb); device_remove_bin_file(dev, &bin_attr_mem); + + state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); + if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) + return; + device_remove_bin_file(dev, &bin_attr_pci_config); device_remove_file(dev, &dev_attr_beacon); if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 10093f0c4c0f..6bc5db703920 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -740,8 +740,8 @@ int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump) int i; if (!mpi_coredump) { - netif_err(qdev, drv, qdev->ndev, "No memory available\n"); - return -ENOMEM; + netif_err(qdev, drv, qdev->ndev, "No memory allocated\n"); + return -EINVAL; } /* Try to get the spinlock, but dont worry if diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c index ff2bf8a4e247..7ad146080c36 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c @@ -1274,7 +1274,7 @@ void ql_mpi_reset_work(struct work_struct *work) return; } - if (!ql_core_dump(qdev, qdev->mpi_coredump)) { + if (qdev->mpi_coredump && !ql_core_dump(qdev, qdev->mpi_coredump)) { netif_err(qdev, drv, qdev->ndev, "Core is dumped!\n"); qdev->core_is_dumped = 1; queue_delayed_work(qdev->workqueue, diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 128d7cdf9eb2..c082562dbf4e 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -27,10 +27,10 @@ /* A reboot/assertion causes the MCDI status word to be set after the * command word is set or a REBOOT event is sent. If we notice a reboot - * via these mechanisms then wait 20ms for the status word to be set. + * via these mechanisms then wait 250ms for the status word to be set. */ #define MCDI_STATUS_DELAY_US 100 -#define MCDI_STATUS_DELAY_COUNT 200 +#define MCDI_STATUS_DELAY_COUNT 2500 #define MCDI_STATUS_SLEEP_MS \ (MCDI_STATUS_DELAY_US * MCDI_STATUS_DELAY_COUNT / 1000) @@ -800,9 +800,6 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) } else { int count; - /* Nobody was waiting for an MCDI request, so trigger a reset */ - efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); - /* Consume the status word since efx_mcdi_rpc_finish() won't */ for (count = 0; count < MCDI_STATUS_DELAY_COUNT; ++count) { if (efx_mcdi_poll_reboot(efx)) @@ -810,6 +807,9 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) udelay(MCDI_STATUS_DELAY_US); } mcdi->new_epoch = true; + + /* Nobody was waiting for an MCDI request, so trigger a reset */ + efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); } spin_unlock(&mcdi->iface_lock); diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index c8f088ab5fdf..bdf697b184ae 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -32,7 +32,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define DRV_NAME "via-rhine" -#define DRV_VERSION "1.5.0" +#define DRV_VERSION "1.5.1" #define DRV_RELDATE "2010-10-09" #include <linux/types.h> @@ -1704,7 +1704,12 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN)); if (unlikely(vlan_tx_tag_present(skb))) { - rp->tx_ring[entry].tx_status = cpu_to_le32((vlan_tx_tag_get(skb)) << 16); + u16 vid_pcp = vlan_tx_tag_get(skb); + + /* drop CFI/DEI bit, register needs VID and PCP */ + vid_pcp = (vid_pcp & VLAN_VID_MASK) | + ((vid_pcp & VLAN_PRIO_MASK) >> 1); + rp->tx_ring[entry].tx_status = cpu_to_le32((vid_pcp) << 16); /* request tagging */ rp->tx_ring[entry].desc_length |= cpu_to_le32(0x020000); } diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index b88121f240ca..0029148077a9 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -297,6 +297,12 @@ static int temac_dma_bd_init(struct net_device *ndev) lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p); + /* Init descriptor indexes */ + lp->tx_bd_ci = 0; + lp->tx_bd_next = 0; + lp->tx_bd_tail = 0; + lp->rx_bd_ci = 0; + return 0; out: diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index a34d6bf5e43b..cc70ecfc7062 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -429,11 +429,13 @@ static void slip_write_wakeup(struct tty_struct *tty) if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) return; + spin_lock(&sl->lock); if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + spin_unlock(&sl->lock); sl_unlock(sl); return; } @@ -441,6 +443,7 @@ static void slip_write_wakeup(struct tty_struct *tty) actual = tty->ops->write(tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; + spin_unlock(&sl->lock); } static void sl_tx_timeout(struct net_device *dev) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 2dbb9460349d..c6867f926cff 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -303,7 +303,7 @@ static void dm9601_set_multicast(struct net_device *net) rx_ctl |= 0x02; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > DM_MAX_MCAST) { - rx_ctl |= 0x04; + rx_ctl |= 0x08; } else if (!netdev_mc_empty(net)) { struct netdev_hw_addr *ha; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6312332afeba..3d6aaf79d8b2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -714,7 +714,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ - {QMI_FIXED_INTF(0x1e2d, 0x12d1, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 7b331e613e02..bf94e10a37c8 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1241,7 +1241,9 @@ static int build_dma_sg(const struct sk_buff *skb, struct urb *urb) if (num_sgs == 1) return 0; - urb->sg = kmalloc(num_sgs * sizeof(struct scatterlist), GFP_ATOMIC); + /* reserve one for zero packet */ + urb->sg = kmalloc((num_sgs + 1) * sizeof(struct scatterlist), + GFP_ATOMIC); if (!urb->sg) return -ENOMEM; @@ -1305,7 +1307,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, if (build_dma_sg(skb, urb) < 0) goto drop; } - entry->length = length = urb->transfer_buffer_length; + length = urb->transfer_buffer_length; /* don't assume the hardware handles USB_ZERO_PACKET * NOTE: strictly conforming cdc-ether devices should expect @@ -1317,15 +1319,18 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, if (length % dev->maxpacket == 0) { if (!(info->flags & FLAG_SEND_ZLP)) { if (!(info->flags & FLAG_MULTI_PACKET)) { - urb->transfer_buffer_length++; - if (skb_tailroom(skb)) { + length++; + if (skb_tailroom(skb) && !urb->num_sgs) { skb->data[skb->len] = 0; __skb_put(skb, 1); - } + } else if (urb->num_sgs) + sg_set_buf(&urb->sg[urb->num_sgs++], + dev->padding_pkt, 1); } } else urb->transfer_flags |= URB_ZERO_PACKET; } + entry->length = urb->transfer_buffer_length = length; spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); @@ -1509,6 +1514,7 @@ void usbnet_disconnect (struct usb_interface *intf) usb_kill_urb(dev->interrupt); usb_free_urb(dev->interrupt); + kfree(dev->padding_pkt); free_netdev(net); } @@ -1679,9 +1685,16 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) /* initialize max rx_qlen and tx_qlen */ usbnet_update_max_qlen(dev); + if (dev->can_dma_sg && !(info->flags & FLAG_SEND_ZLP) && + !(info->flags & FLAG_MULTI_PACKET)) { + dev->padding_pkt = kzalloc(1, GFP_KERNEL); + if (!dev->padding_pkt) + goto out4; + } + status = register_netdev (net); if (status) - goto out4; + goto out5; netif_info(dev, probe, dev->net, "register '%s' at usb-%s-%s, %s, %pM\n", udev->dev.driver->name, @@ -1699,6 +1712,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) return 0; +out5: + kfree(dev->padding_pkt); out4: usb_free_urb(dev->interrupt); out3: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d1292fe746bc..2ef5b6219f3f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -952,8 +952,7 @@ void vxlan_sock_release(struct vxlan_sock *vs) spin_lock(&vn->sock_lock); hlist_del_rcu(&vs->hlist); - smp_wmb(); - vs->sock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(vs->sock->sk, NULL); vxlan_notify_del_rx_port(sk); spin_unlock(&vn->sock_lock); @@ -1048,8 +1047,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) port = inet_sk(sk)->inet_sport; - smp_read_barrier_depends(); - vs = (struct vxlan_sock *)sk->sk_user_data; + vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; @@ -2302,8 +2300,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, atomic_set(&vs->refcnt, 1); vs->rcv = rcv; vs->data = data; - smp_wmb(); - vs->sock->sk->sk_user_data = vs; + rcu_assign_sk_user_data(vs->sock->sk, vs); spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 4ee472a5a4e4..ab9e3a8410bc 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -1270,13 +1270,6 @@ static void ath9k_antenna_check(struct ath_softc *sc, return; /* - * All MPDUs in an aggregate will use the same LNA - * as the first MPDU. - */ - if (rs->rs_isaggr && !rs->rs_firstaggr) - return; - - /* * Change the default rx antenna if rx diversity * chooses the other antenna 3 times in a row. */ diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 35b515fe3ffa..5ac713d2ff5d 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -399,6 +399,7 @@ static struct ath_buf* ath_clone_txbuf(struct ath_softc *sc, struct ath_buf *bf) tbf->bf_buf_addr = bf->bf_buf_addr; memcpy(tbf->bf_desc, bf->bf_desc, sc->sc_ah->caps.tx_desc_len); tbf->bf_state = bf->bf_state; + tbf->bf_state.stale = false; return tbf; } @@ -1389,11 +1390,15 @@ int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid, u16 *ssn) { struct ath_atx_tid *txtid; + struct ath_txq *txq; struct ath_node *an; u8 density; an = (struct ath_node *)sta->drv_priv; txtid = ATH_AN_2_TID(an, tid); + txq = txtid->ac->txq; + + ath_txq_lock(sc, txq); /* update ampdu factor/density, they may have changed. This may happen * in HT IBSS when a beacon with HT-info is received after the station @@ -1417,6 +1422,8 @@ int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta, memset(txtid->tx_buf, 0, sizeof(txtid->tx_buf)); txtid->baw_head = txtid->baw_tail = 0; + ath_txq_unlock_complete(sc, txq); + return 0; } @@ -1555,8 +1562,10 @@ void ath9k_release_buffered_frames(struct ieee80211_hw *hw, __skb_unlink(bf->bf_mpdu, tid_q); list_add_tail(&bf->list, &bf_q); ath_set_rates(tid->an->vif, tid->an->sta, bf); - ath_tx_addto_baw(sc, tid, bf); - bf->bf_state.bf_type &= ~BUF_AGGR; + if (bf_isampdu(bf)) { + ath_tx_addto_baw(sc, tid, bf); + bf->bf_state.bf_type &= ~BUF_AGGR; + } if (bf_tail) bf_tail->bf_next = bf; @@ -1950,7 +1959,9 @@ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, if (bf_is_ampdu_not_probing(bf)) txq->axq_ampdu_depth++; - bf = bf->bf_lastbf->bf_next; + bf_last = bf->bf_lastbf; + bf = bf_last->bf_next; + bf_last->bf_next = NULL; } } } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c index 64f4a2bc8dde..c3462b75bd08 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c @@ -464,8 +464,6 @@ static struct sdio_driver brcmf_sdmmc_driver = { static int brcmf_sdio_pd_probe(struct platform_device *pdev) { - int ret; - brcmf_dbg(SDIO, "Enter\n"); brcmfmac_sdio_pdata = pdev->dev.platform_data; @@ -473,11 +471,7 @@ static int brcmf_sdio_pd_probe(struct platform_device *pdev) if (brcmfmac_sdio_pdata->power_on) brcmfmac_sdio_pdata->power_on(); - ret = sdio_register_driver(&brcmf_sdmmc_driver); - if (ret) - brcmf_err("sdio_register_driver failed: %d\n", ret); - - return ret; + return 0; } static int brcmf_sdio_pd_remove(struct platform_device *pdev) @@ -500,6 +494,15 @@ static struct platform_driver brcmf_sdio_pd = { } }; +void brcmf_sdio_register(void) +{ + int ret; + + ret = sdio_register_driver(&brcmf_sdmmc_driver); + if (ret) + brcmf_err("sdio_register_driver failed: %d\n", ret); +} + void brcmf_sdio_exit(void) { brcmf_dbg(SDIO, "Enter\n"); @@ -510,18 +513,13 @@ void brcmf_sdio_exit(void) sdio_unregister_driver(&brcmf_sdmmc_driver); } -void brcmf_sdio_init(void) +void __init brcmf_sdio_init(void) { int ret; brcmf_dbg(SDIO, "Enter\n"); ret = platform_driver_probe(&brcmf_sdio_pd, brcmf_sdio_pd_probe); - if (ret == -ENODEV) { - brcmf_dbg(SDIO, "No platform data available, registering without.\n"); - ret = sdio_register_driver(&brcmf_sdmmc_driver); - } - - if (ret) - brcmf_err("driver registration failed: %d\n", ret); + if (ret == -ENODEV) + brcmf_dbg(SDIO, "No platform data available.\n"); } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h b/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h index f7c1985844e4..74156f84180c 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h @@ -156,10 +156,11 @@ extern int brcmf_bus_start(struct device *dev); #ifdef CONFIG_BRCMFMAC_SDIO extern void brcmf_sdio_exit(void); extern void brcmf_sdio_init(void); +extern void brcmf_sdio_register(void); #endif #ifdef CONFIG_BRCMFMAC_USB extern void brcmf_usb_exit(void); -extern void brcmf_usb_init(void); +extern void brcmf_usb_register(void); #endif #endif /* _BRCMF_BUS_H_ */ diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c index e067aec1fbf1..40e7f854e10f 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c @@ -1231,21 +1231,23 @@ u32 brcmf_get_chip_info(struct brcmf_if *ifp) return bus->chip << 4 | bus->chiprev; } -static void brcmf_driver_init(struct work_struct *work) +static void brcmf_driver_register(struct work_struct *work) { - brcmf_debugfs_init(); - #ifdef CONFIG_BRCMFMAC_SDIO - brcmf_sdio_init(); + brcmf_sdio_register(); #endif #ifdef CONFIG_BRCMFMAC_USB - brcmf_usb_init(); + brcmf_usb_register(); #endif } -static DECLARE_WORK(brcmf_driver_work, brcmf_driver_init); +static DECLARE_WORK(brcmf_driver_work, brcmf_driver_register); static int __init brcmfmac_module_init(void) { + brcmf_debugfs_init(); +#ifdef CONFIG_BRCMFMAC_SDIO + brcmf_sdio_init(); +#endif if (!schedule_work(&brcmf_driver_work)) return -EBUSY; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c index 39e01a7c8556..f4aea47e0730 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c @@ -1539,7 +1539,7 @@ void brcmf_usb_exit(void) brcmf_release_fw(&fw_image_list); } -void brcmf_usb_init(void) +void brcmf_usb_register(void) { brcmf_dbg(USB, "Enter\n"); INIT_LIST_HEAD(&fw_image_list); diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index 3a6544710c8a..edc5d105ff98 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -457,6 +457,8 @@ static int brcms_ops_start(struct ieee80211_hw *hw) if (err != 0) brcms_err(wl->wlc->hw->d11core, "%s: brcms_up() returned %d\n", __func__, err); + + bcma_core_pci_power_save(wl->wlc->hw->d11core->bus, true); return err; } @@ -479,6 +481,8 @@ static void brcms_ops_stop(struct ieee80211_hw *hw) return; } + bcma_core_pci_power_save(wl->wlc->hw->d11core->bus, false); + /* put driver in down state */ spin_lock_bh(&wl->lock); brcms_down(wl); diff --git a/drivers/net/wireless/cw1200/cw1200_spi.c b/drivers/net/wireless/cw1200/cw1200_spi.c index f5e6b489ed32..899cad34ccd3 100644 --- a/drivers/net/wireless/cw1200/cw1200_spi.c +++ b/drivers/net/wireless/cw1200/cw1200_spi.c @@ -42,7 +42,6 @@ struct hwbus_priv { spinlock_t lock; /* Serialize all bus operations */ wait_queue_head_t wq; int claimed; - int irq_disabled; }; #define SDIO_TO_SPI_ADDR(addr) ((addr & 0x1f)>>2) @@ -238,8 +237,6 @@ static irqreturn_t cw1200_spi_irq_handler(int irq, void *dev_id) struct hwbus_priv *self = dev_id; if (self->core) { - disable_irq_nosync(self->func->irq); - self->irq_disabled = 1; cw1200_irq_handler(self->core); return IRQ_HANDLED; } else { @@ -253,9 +250,10 @@ static int cw1200_spi_irq_subscribe(struct hwbus_priv *self) pr_debug("SW IRQ subscribe\n"); - ret = request_any_context_irq(self->func->irq, cw1200_spi_irq_handler, - IRQF_TRIGGER_HIGH, - "cw1200_wlan_irq", self); + ret = request_threaded_irq(self->func->irq, NULL, + cw1200_spi_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "cw1200_wlan_irq", self); if (WARN_ON(ret < 0)) goto exit; @@ -273,22 +271,13 @@ exit: static int cw1200_spi_irq_unsubscribe(struct hwbus_priv *self) { + int ret = 0; + pr_debug("SW IRQ unsubscribe\n"); disable_irq_wake(self->func->irq); free_irq(self->func->irq, self); - return 0; -} - -static int cw1200_spi_irq_enable(struct hwbus_priv *self, int enable) -{ - /* Disables are handled by the interrupt handler */ - if (enable && self->irq_disabled) { - enable_irq(self->func->irq); - self->irq_disabled = 0; - } - - return 0; + return ret; } static int cw1200_spi_off(const struct cw1200_platform_data_spi *pdata) @@ -368,7 +357,6 @@ static struct hwbus_ops cw1200_spi_hwbus_ops = { .unlock = cw1200_spi_unlock, .align_size = cw1200_spi_align_size, .power_mgmt = cw1200_spi_pm, - .irq_enable = cw1200_spi_irq_enable, }; /* Probe Function to be called by SPI stack when device is discovered */ diff --git a/drivers/net/wireless/cw1200/fwio.c b/drivers/net/wireless/cw1200/fwio.c index 0b2061bbc68b..acdff0f7f952 100644 --- a/drivers/net/wireless/cw1200/fwio.c +++ b/drivers/net/wireless/cw1200/fwio.c @@ -485,7 +485,7 @@ int cw1200_load_firmware(struct cw1200_common *priv) /* Enable interrupt signalling */ priv->hwbus_ops->lock(priv->hwbus_priv); - ret = __cw1200_irq_enable(priv, 2); + ret = __cw1200_irq_enable(priv, 1); priv->hwbus_ops->unlock(priv->hwbus_priv); if (ret < 0) goto unsubscribe; diff --git a/drivers/net/wireless/cw1200/hwbus.h b/drivers/net/wireless/cw1200/hwbus.h index 51dfb3a90735..8b2fc831c3de 100644 --- a/drivers/net/wireless/cw1200/hwbus.h +++ b/drivers/net/wireless/cw1200/hwbus.h @@ -28,7 +28,6 @@ struct hwbus_ops { void (*unlock)(struct hwbus_priv *self); size_t (*align_size)(struct hwbus_priv *self, size_t size); int (*power_mgmt)(struct hwbus_priv *self, bool suspend); - int (*irq_enable)(struct hwbus_priv *self, int enable); }; #endif /* CW1200_HWBUS_H */ diff --git a/drivers/net/wireless/cw1200/hwio.c b/drivers/net/wireless/cw1200/hwio.c index 41bd7615ccaa..ff230b7aeedd 100644 --- a/drivers/net/wireless/cw1200/hwio.c +++ b/drivers/net/wireless/cw1200/hwio.c @@ -273,21 +273,6 @@ int __cw1200_irq_enable(struct cw1200_common *priv, int enable) u16 val16; int ret; - /* We need to do this hack because the SPI layer can sleep on I/O - and the general path involves I/O to the device in interrupt - context. - - However, the initial enable call needs to go to the hardware. - - We don't worry about shutdown because we do a full reset which - clears the interrupt enabled bits. - */ - if (priv->hwbus_ops->irq_enable) { - ret = priv->hwbus_ops->irq_enable(priv->hwbus_priv, enable); - if (ret || enable < 2) - return ret; - } - if (HIF_8601_SILICON == priv->hw_type) { ret = __cw1200_reg_read_32(priv, ST90TDS_CONFIG_REG_ID, &val32); if (ret < 0) { diff --git a/drivers/net/wireless/mwifiex/11n_aggr.c b/drivers/net/wireless/mwifiex/11n_aggr.c index 21c688264708..1214c587fd08 100644 --- a/drivers/net/wireless/mwifiex/11n_aggr.c +++ b/drivers/net/wireless/mwifiex/11n_aggr.c @@ -150,7 +150,7 @@ mwifiex_11n_form_amsdu_txpd(struct mwifiex_private *priv, */ int mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, - struct mwifiex_ra_list_tbl *pra_list, int headroom, + struct mwifiex_ra_list_tbl *pra_list, int ptrindex, unsigned long ra_list_flags) __releases(&priv->wmm.ra_list_spinlock) { @@ -160,6 +160,7 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, int pad = 0, ret; struct mwifiex_tx_param tx_param; struct txpd *ptx_pd = NULL; + int headroom = adapter->iface_type == MWIFIEX_USB ? 0 : INTF_HEADER_LEN; skb_src = skb_peek(&pra_list->skb_head); if (!skb_src) { diff --git a/drivers/net/wireless/mwifiex/11n_aggr.h b/drivers/net/wireless/mwifiex/11n_aggr.h index 900e1c62a0cc..892098d6a696 100644 --- a/drivers/net/wireless/mwifiex/11n_aggr.h +++ b/drivers/net/wireless/mwifiex/11n_aggr.h @@ -26,7 +26,7 @@ int mwifiex_11n_deaggregate_pkt(struct mwifiex_private *priv, struct sk_buff *skb); int mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, - struct mwifiex_ra_list_tbl *ptr, int headroom, + struct mwifiex_ra_list_tbl *ptr, int ptr_index, unsigned long flags) __releases(&priv->wmm.ra_list_spinlock); diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c index 2d761477d15e..a6c46f3b6e3a 100644 --- a/drivers/net/wireless/mwifiex/cmdevt.c +++ b/drivers/net/wireless/mwifiex/cmdevt.c @@ -1155,7 +1155,7 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv, uint32_t conditions = le32_to_cpu(phs_cfg->params.hs_config.conditions); if (phs_cfg->action == cpu_to_le16(HS_ACTIVATE) && - adapter->iface_type == MWIFIEX_SDIO) { + adapter->iface_type != MWIFIEX_USB) { mwifiex_hs_activated_event(priv, true); return 0; } else { @@ -1167,8 +1167,7 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv, } if (conditions != HS_CFG_CANCEL) { adapter->is_hs_configured = true; - if (adapter->iface_type == MWIFIEX_USB || - adapter->iface_type == MWIFIEX_PCIE) + if (adapter->iface_type == MWIFIEX_USB) mwifiex_hs_activated_event(priv, true); } else { adapter->is_hs_configured = false; diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index 2472d4b7f00e..1c70b8d09227 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -447,9 +447,6 @@ static int mwifiex_usb_suspend(struct usb_interface *intf, pm_message_t message) */ adapter->is_suspended = true; - for (i = 0; i < adapter->priv_num; i++) - netif_carrier_off(adapter->priv[i]->netdev); - if (atomic_read(&card->rx_cmd_urb_pending) && card->rx_cmd.urb) usb_kill_urb(card->rx_cmd.urb); @@ -509,10 +506,6 @@ static int mwifiex_usb_resume(struct usb_interface *intf) MWIFIEX_RX_CMD_BUF_SIZE); } - for (i = 0; i < adapter->priv_num; i++) - if (adapter->priv[i]->media_connected) - netif_carrier_on(adapter->priv[i]->netdev); - /* Disable Host Sleep */ if (adapter->hs_activated) mwifiex_cancel_hs(mwifiex_get_priv(adapter, diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c index 2e8f9cdea54d..95fa3599b407 100644 --- a/drivers/net/wireless/mwifiex/wmm.c +++ b/drivers/net/wireless/mwifiex/wmm.c @@ -1239,8 +1239,7 @@ mwifiex_dequeue_tx_packet(struct mwifiex_adapter *adapter) if (enable_tx_amsdu && mwifiex_is_amsdu_allowed(priv, tid) && mwifiex_is_11n_aggragation_possible(priv, ptr, adapter->tx_buf_size)) - mwifiex_11n_aggregate_pkt(priv, ptr, INTF_HEADER_LEN, - ptr_index, flags); + mwifiex_11n_aggregate_pkt(priv, ptr, ptr_index, flags); /* ra_list_spinlock has been freed in mwifiex_11n_aggregate_pkt() */ else diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c index b9deef66cf4b..e328d3058c41 100644 --- a/drivers/net/wireless/p54/p54usb.c +++ b/drivers/net/wireless/p54/p54usb.c @@ -83,6 +83,7 @@ static struct usb_device_id p54u_table[] = { {USB_DEVICE(0x06a9, 0x000e)}, /* Westell 802.11g USB (A90-211WG-01) */ {USB_DEVICE(0x06b9, 0x0121)}, /* Thomson SpeedTouch 121g */ {USB_DEVICE(0x0707, 0xee13)}, /* SMC 2862W-G version 2 */ + {USB_DEVICE(0x07aa, 0x0020)}, /* Corega WLUSB2GTST USB */ {USB_DEVICE(0x0803, 0x4310)}, /* Zoom 4410a */ {USB_DEVICE(0x083a, 0x4521)}, /* Siemens Gigaset USB Adapter 54 version 2 */ {USB_DEVICE(0x083a, 0x4531)}, /* T-Com Sinus 154 data II */ @@ -979,6 +980,7 @@ static int p54u_load_firmware(struct ieee80211_hw *dev, if (err) { dev_err(&priv->udev->dev, "(p54usb) cannot load firmware %s " "(%d)!\n", p54u_fwlist[i].fw, err); + usb_put_dev(udev); } return err; diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index cc03e7c87cbe..703258742d28 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2057,7 +2057,7 @@ struct rtl_priv { that it points to the data allocated beyond this structure like: rtl_pci_priv or rtl_usb_priv */ - u8 priv[0]; + u8 priv[0] __aligned(sizeof(void *)); }; #define rtl_priv(hw) (((struct rtl_priv *)(hw)->priv)) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index a53782ef1540..b45bce20ad76 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -24,6 +24,12 @@ struct backend_info { struct xenbus_device *dev; struct xenvif *vif; + + /* This is the state that will be reflected in xenstore when any + * active hotplug script completes. + */ + enum xenbus_state state; + enum xenbus_state frontend_state; struct xenbus_watch hotplug_status_watch; u8 have_hotplug_status_watch:1; @@ -136,6 +142,8 @@ static int netback_probe(struct xenbus_device *dev, if (err) goto fail; + be->state = XenbusStateInitWait; + /* This kicks hotplug scripts, so do it immediately. */ backend_create_xenvif(be); @@ -208,24 +216,113 @@ static void backend_create_xenvif(struct backend_info *be) kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); } - -static void disconnect_backend(struct xenbus_device *dev) +static void backend_disconnect(struct backend_info *be) { - struct backend_info *be = dev_get_drvdata(&dev->dev); - if (be->vif) xenvif_disconnect(be->vif); } -static void destroy_backend(struct xenbus_device *dev) +static void backend_connect(struct backend_info *be) { - struct backend_info *be = dev_get_drvdata(&dev->dev); + if (be->vif) + connect(be); +} - if (be->vif) { - kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); - xenvif_free(be->vif); - be->vif = NULL; +static inline void backend_switch_state(struct backend_info *be, + enum xenbus_state state) +{ + struct xenbus_device *dev = be->dev; + + pr_debug("%s -> %s\n", dev->nodename, xenbus_strstate(state)); + be->state = state; + + /* If we are waiting for a hotplug script then defer the + * actual xenbus state change. + */ + if (!be->have_hotplug_status_watch) + xenbus_switch_state(dev, state); +} + +/* Handle backend state transitions: + * + * The backend state starts in InitWait and the following transitions are + * allowed. + * + * InitWait -> Connected + * + * ^ \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | V V + * + * Closed <-> Closing + * + * The state argument specifies the eventual state of the backend and the + * function transitions to that state via the shortest path. + */ +static void set_backend_state(struct backend_info *be, + enum xenbus_state state) +{ + while (be->state != state) { + switch (be->state) { + case XenbusStateClosed: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + pr_info("%s: prepare for reconnect\n", + be->dev->nodename); + backend_switch_state(be, XenbusStateInitWait); + break; + case XenbusStateClosing: + backend_switch_state(be, XenbusStateClosing); + break; + default: + BUG(); + } + break; + case XenbusStateInitWait: + switch (state) { + case XenbusStateConnected: + backend_connect(be); + backend_switch_state(be, XenbusStateConnected); + break; + case XenbusStateClosing: + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosing); + break; + default: + BUG(); + } + break; + case XenbusStateConnected: + switch (state) { + case XenbusStateInitWait: + case XenbusStateClosing: + case XenbusStateClosed: + backend_disconnect(be); + backend_switch_state(be, XenbusStateClosing); + break; + default: + BUG(); + } + break; + case XenbusStateClosing: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosed); + break; + default: + BUG(); + } + break; + default: + BUG(); + } } } @@ -237,40 +334,33 @@ static void frontend_changed(struct xenbus_device *dev, { struct backend_info *be = dev_get_drvdata(&dev->dev); - pr_debug("frontend state %s\n", xenbus_strstate(frontend_state)); + pr_debug("%s -> %s\n", dev->otherend, xenbus_strstate(frontend_state)); be->frontend_state = frontend_state; switch (frontend_state) { case XenbusStateInitialising: - if (dev->state == XenbusStateClosed) { - pr_info("%s: prepare for reconnect\n", dev->nodename); - xenbus_switch_state(dev, XenbusStateInitWait); - } + set_backend_state(be, XenbusStateInitWait); break; case XenbusStateInitialised: break; case XenbusStateConnected: - if (dev->state == XenbusStateConnected) - break; - if (be->vif) - connect(be); + set_backend_state(be, XenbusStateConnected); break; case XenbusStateClosing: - disconnect_backend(dev); - xenbus_switch_state(dev, XenbusStateClosing); + set_backend_state(be, XenbusStateClosing); break; case XenbusStateClosed: - xenbus_switch_state(dev, XenbusStateClosed); + set_backend_state(be, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; - destroy_backend(dev); /* fall through if not online */ case XenbusStateUnknown: + set_backend_state(be, XenbusStateClosed); device_unregister(&dev->dev); break; @@ -363,7 +453,9 @@ static void hotplug_status_changed(struct xenbus_watch *watch, if (IS_ERR(str)) return; if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) { - xenbus_switch_state(be->dev, XenbusStateConnected); + /* Complete any pending state change */ + xenbus_switch_state(be->dev, be->state); + /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); } @@ -393,12 +485,8 @@ static void connect(struct backend_info *be) err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, hotplug_status_changed, "%s/%s", dev->nodename, "hotplug-status"); - if (err) { - /* Switch now, since we can't do a watch. */ - xenbus_switch_state(dev, XenbusStateConnected); - } else { + if (!err) be->have_hotplug_status_watch = 1; - } netif_wake_queue(be->vif->dev); } diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 9d2009a9004d..78cc76053328 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -74,10 +74,4 @@ config OF_MTD depends on MTD def_bool y -config OF_RESERVED_MEM - depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK)) - def_bool y - help - Initialization code for DMA reserved memory - endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index ed9660adad77..efd05102c405 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -9,4 +9,3 @@ obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o -obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o diff --git a/drivers/of/base.c b/drivers/of/base.c index 865d3f66c86b..7d4c70f859e3 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -303,10 +303,8 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) struct device_node *cpun, *cpus; cpus = of_find_node_by_path("/cpus"); - if (!cpus) { - pr_warn("Missing cpus node, bailing out\n"); + if (!cpus) return NULL; - } for_each_child_of_node(cpus, cpun) { if (of_node_cmp(cpun->type, "cpu")) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 229dd9d69e18..a4fa9ad31b8f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/slab.h> -#include <linux/random.h> #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ #ifdef CONFIG_PPC @@ -803,14 +802,3 @@ void __init unflatten_device_tree(void) } #endif /* CONFIG_OF_EARLY_FLATTREE */ - -/* Feed entire flattened device tree into the random pool */ -static int __init add_fdt_randomness(void) -{ - if (initial_boot_params) - add_device_randomness(initial_boot_params, - be32_to_cpu(initial_boot_params->totalsize)); - - return 0; -} -core_initcall(add_fdt_randomness); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c deleted file mode 100644 index 0fe40c7d6904..000000000000 --- a/drivers/of/of_reserved_mem.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Device tree based initialization code for reserved memory. - * - * Copyright (c) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * Author: Marek Szyprowski <m.szyprowski@samsung.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - */ - -#include <linux/memblock.h> -#include <linux/err.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/of_platform.h> -#include <linux/mm.h> -#include <linux/sizes.h> -#include <linux/mm_types.h> -#include <linux/dma-contiguous.h> -#include <linux/dma-mapping.h> -#include <linux/of_reserved_mem.h> - -#define MAX_RESERVED_REGIONS 16 -struct reserved_mem { - phys_addr_t base; - unsigned long size; - struct cma *cma; - char name[32]; -}; -static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; -static int reserved_mem_count; - -static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname, - int depth, void *data) -{ - struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; - phys_addr_t base, size; - int is_cma, is_reserved; - unsigned long len; - const char *status; - __be32 *prop; - - is_cma = IS_ENABLED(CONFIG_DMA_CMA) && - of_flat_dt_is_compatible(node, "linux,contiguous-memory-region"); - is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region"); - - if (!is_reserved && !is_cma) { - /* ignore node and scan next one */ - return 0; - } - - status = of_get_flat_dt_prop(node, "status", &len); - if (status && strcmp(status, "okay") != 0) { - /* ignore disabled node nad scan next one */ - return 0; - } - - prop = of_get_flat_dt_prop(node, "reg", &len); - if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) * - sizeof(__be32))) { - pr_err("Reserved mem: node %s, incorrect \"reg\" property\n", - uname); - /* ignore node and scan next one */ - return 0; - } - base = dt_mem_next_cell(dt_root_addr_cells, &prop); - size = dt_mem_next_cell(dt_root_size_cells, &prop); - - if (!size) { - /* ignore node and scan next one */ - return 0; - } - - pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n", - uname, (unsigned long)base, (unsigned long)size / SZ_1M); - - if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) - return -ENOSPC; - - rmem->base = base; - rmem->size = size; - strlcpy(rmem->name, uname, sizeof(rmem->name)); - - if (is_cma) { - struct cma *cma; - if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) { - rmem->cma = cma; - reserved_mem_count++; - if (of_get_flat_dt_prop(node, - "linux,default-contiguous-region", - NULL)) - dma_contiguous_set_default(cma); - } - } else if (is_reserved) { - if (memblock_remove(base, size) == 0) - reserved_mem_count++; - else - pr_err("Failed to reserve memory for %s\n", uname); - } - - return 0; -} - -static struct reserved_mem *get_dma_memory_region(struct device *dev) -{ - struct device_node *node; - const char *name; - int i; - - node = of_parse_phandle(dev->of_node, "memory-region", 0); - if (!node) - return NULL; - - name = kbasename(node->full_name); - for (i = 0; i < reserved_mem_count; i++) - if (strcmp(name, reserved_mem[i].name) == 0) - return &reserved_mem[i]; - return NULL; -} - -/** - * of_reserved_mem_device_init() - assign reserved memory region to given device - * - * This function assign memory region pointed by "memory-region" device tree - * property to the given device. - */ -void of_reserved_mem_device_init(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region) - return; - - if (region->cma) { - dev_set_cma_area(dev, region->cma); - pr_info("Assigned CMA %s to %s device\n", region->name, - dev_name(dev)); - } else { - if (dma_declare_coherent_memory(dev, region->base, region->base, - region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0) - pr_info("Declared reserved memory %s to %s device\n", - region->name, dev_name(dev)); - } -} - -/** - * of_reserved_mem_device_release() - release reserved memory device structures - * - * This function releases structures allocated for memory region handling for - * the given device. - */ -void of_reserved_mem_device_release(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region && !region->cma) - dma_release_declared_memory(dev); -} - -/** - * early_init_dt_scan_reserved_mem() - create reserved memory regions - * - * This function grabs memory from early allocator for device exclusive use - * defined in device tree structures. It should be called by arch specific code - * once the early allocator (memblock) has been activated and all other - * subsystems have already allocated/reserved memory. - */ -void __init early_init_dt_scan_reserved_mem(void) -{ - of_scan_flat_dt_by_path("/memory/reserved-memory", - fdt_scan_reserved_mem, NULL); -} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 9b439ac63d8e..f6dcde220821 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -21,7 +21,6 @@ #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/of_platform.h> -#include <linux/of_reserved_mem.h> #include <linux/platform_device.h> const struct of_device_id of_default_bus_match_table[] = { @@ -219,8 +218,6 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; - of_reserved_mem_device_init(&dev->dev); - /* We do not fill the DMA ops for platform devices by default. * This is currently the responsibility of the platform code * to do such, possibly using a device notifier @@ -228,7 +225,6 @@ static struct platform_device *of_platform_device_create_pdata( if (of_device_add(dev) != 0) { platform_device_put(dev); - of_reserved_mem_device_release(&dev->dev); return NULL; } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 0b7d23b4ad95..be12fbfcae10 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -994,14 +994,16 @@ void acpiphp_enumerate_slots(struct pci_bus *bus) /* * This bridge should have been registered as a hotplug function - * under its parent, so the context has to be there. If not, we - * are in deep goo. + * under its parent, so the context should be there, unless the + * parent is going to be handled by pciehp, in which case this + * bridge is not interesting to us either. */ mutex_lock(&acpiphp_context_lock); context = acpiphp_get_context(handle); - if (WARN_ON(!context)) { + if (!context) { mutex_unlock(&acpiphp_context_lock); put_device(&bus->dev); + pci_dev_put(bridge->pci_dev); kfree(bridge); return; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e8ccf6c0f08a..bdd64b1b4817 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1155,8 +1155,14 @@ static void pci_enable_bridge(struct pci_dev *dev) pci_enable_bridge(dev->bus->self); - if (pci_is_enabled(dev)) + if (pci_is_enabled(dev)) { + if (!dev->is_busmaster) { + dev_warn(&dev->dev, "driver skip pci_set_master, fix it!\n"); + pci_set_master(dev); + } return; + } + retval = pci_enable_device(dev); if (retval) dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index a138965c01cb..b8fcc38c0d11 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -490,7 +490,7 @@ exit: * <devicename> <state> <pinname> are values that should match the pinctrl-maps * <newvalue> reflects the new config and is driver dependant */ -static int pinconf_dbg_config_write(struct file *file, +static ssize_t pinconf_dbg_config_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct pinctrl_maps *maps_node; @@ -508,7 +508,7 @@ static int pinconf_dbg_config_write(struct file *file, int i; /* Get userspace string and assure termination */ - buf_size = min(count, (size_t)(sizeof(buf)-1)); + buf_size = min(count, sizeof(buf) - 1); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; buf[buf_size] = 0; diff --git a/drivers/pinctrl/pinctrl-exynos.c b/drivers/pinctrl/pinctrl-exynos.c index 2689f8d01a1e..155b1b3a0e7a 100644 --- a/drivers/pinctrl/pinctrl-exynos.c +++ b/drivers/pinctrl/pinctrl-exynos.c @@ -663,18 +663,18 @@ static void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata) /* pin banks of s5pv210 pin-controller */ static struct samsung_pin_bank s5pv210_pin_bank[] = { EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00), - EXYNOS_PIN_BANK_EINTG(6, 0x020, "gpa1", 0x04), + EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpa1", 0x04), EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpb", 0x08), EXYNOS_PIN_BANK_EINTG(5, 0x060, "gpc0", 0x0c), EXYNOS_PIN_BANK_EINTG(5, 0x080, "gpc1", 0x10), EXYNOS_PIN_BANK_EINTG(4, 0x0a0, "gpd0", 0x14), - EXYNOS_PIN_BANK_EINTG(4, 0x0c0, "gpd1", 0x18), - EXYNOS_PIN_BANK_EINTG(5, 0x0e0, "gpe0", 0x1c), - EXYNOS_PIN_BANK_EINTG(8, 0x100, "gpe1", 0x20), - EXYNOS_PIN_BANK_EINTG(6, 0x120, "gpf0", 0x24), + EXYNOS_PIN_BANK_EINTG(6, 0x0c0, "gpd1", 0x18), + EXYNOS_PIN_BANK_EINTG(8, 0x0e0, "gpe0", 0x1c), + EXYNOS_PIN_BANK_EINTG(5, 0x100, "gpe1", 0x20), + EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpf0", 0x24), EXYNOS_PIN_BANK_EINTG(8, 0x140, "gpf1", 0x28), EXYNOS_PIN_BANK_EINTG(8, 0x160, "gpf2", 0x2c), - EXYNOS_PIN_BANK_EINTG(8, 0x180, "gpf3", 0x30), + EXYNOS_PIN_BANK_EINTG(6, 0x180, "gpf3", 0x30), EXYNOS_PIN_BANK_EINTG(7, 0x1a0, "gpg0", 0x34), EXYNOS_PIN_BANK_EINTG(7, 0x1c0, "gpg1", 0x38), EXYNOS_PIN_BANK_EINTG(7, 0x1e0, "gpg2", 0x3c), diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c index 82638fac3cfa..30c4d356cb33 100644 --- a/drivers/pinctrl/pinctrl-palmas.c +++ b/drivers/pinctrl/pinctrl-palmas.c @@ -891,9 +891,10 @@ static int palmas_pinconf_set(struct pinctrl_dev *pctldev, param = pinconf_to_config_param(configs[i]); param_val = pinconf_to_config_argument(configs[i]); + if (param == PIN_CONFIG_BIAS_PULL_PIN_DEFAULT) + continue; + switch (param) { - case PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: - return 0; case PIN_CONFIG_BIAS_DISABLE: case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_BIAS_PULL_DOWN: diff --git a/drivers/pinctrl/pinctrl-tegra114.c b/drivers/pinctrl/pinctrl-tegra114.c index 622c4854977e..93c9e3899d5e 100644 --- a/drivers/pinctrl/pinctrl-tegra114.c +++ b/drivers/pinctrl/pinctrl-tegra114.c @@ -3,7 +3,7 @@ * * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. * - * Arthur: Pritesh Raithatha <praithatha@nvidia.com> + * Author: Pritesh Raithatha <praithatha@nvidia.com> * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -2763,7 +2763,6 @@ static struct platform_driver tegra114_pinctrl_driver = { }; module_platform_driver(tegra114_pinctrl_driver); -MODULE_ALIAS("platform:tegra114-pinctrl"); MODULE_AUTHOR("Pritesh Raithatha <praithatha@nvidia.com>"); -MODULE_DESCRIPTION("NVIDIA Tegra114 pincontrol driver"); +MODULE_DESCRIPTION("NVIDIA Tegra114 pinctrl driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c index 1a7816390773..b9f2653e4ef9 100644 --- a/drivers/regulator/da9063-regulator.c +++ b/drivers/regulator/da9063-regulator.c @@ -709,7 +709,7 @@ static struct da9063_regulators_pdata *da9063_parse_regulators_dt( struct of_regulator_match **da9063_reg_matches) { da9063_reg_matches = NULL; - return PTR_ERR(-ENODEV); + return ERR_PTR(-ENODEV); } #endif diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 488dfe7ce9a6..7e2b165972e6 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -201,13 +201,7 @@ static unsigned int palmas_smps_ramp_delay[4] = {0, 10000, 5000, 2500}; #define SMPS_CTRL_MODE_ECO 0x02 #define SMPS_CTRL_MODE_PWM 0x03 -/* These values are derived from the data sheet. And are the number of steps - * where there is a voltage change, the ranges at beginning and end of register - * max/min values where there are no change are ommitted. - * - * So they are basically (maxV-minV)/stepV - */ -#define PALMAS_SMPS_NUM_VOLTAGES 117 +#define PALMAS_SMPS_NUM_VOLTAGES 122 #define PALMAS_SMPS10_NUM_VOLTAGES 2 #define PALMAS_LDO_NUM_VOLTAGES 50 @@ -979,6 +973,7 @@ static int palmas_regulators_probe(struct platform_device *pdev) pmic->desc[id].min_uV = 900000; pmic->desc[id].uV_step = 50000; pmic->desc[id].linear_min_sel = 1; + pmic->desc[id].enable_time = 500; pmic->desc[id].vsel_reg = PALMAS_BASE_TO_REG(PALMAS_LDO_BASE, palmas_regs_info[id].vsel_addr); @@ -997,6 +992,11 @@ static int palmas_regulators_probe(struct platform_device *pdev) pmic->desc[id].min_uV = 450000; pmic->desc[id].uV_step = 25000; } + + /* LOD6 in vibrator mode will have enable time 2000us */ + if (pdata && pdata->ldo6_vibrator && + (id == PALMAS_REG_LDO6)) + pmic->desc[id].enable_time = 2000; } else { pmic->desc[id].n_voltages = 1; pmic->desc[id].ops = &palmas_ops_extreg; diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index d8e3e1262bc2..20c271d49dcb 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -279,8 +279,12 @@ static int ti_abb_set_opp(struct regulator_dev *rdev, struct ti_abb *abb, ti_abb_rmw(regs->opp_sel_mask, info->opp_sel, regs->control_reg, abb->base); - /* program LDO VBB vset override if needed */ - if (abb->ldo_base) + /* + * program LDO VBB vset override if needed for !bypass mode + * XXX: Do not switch sequence - for !bypass, LDO override reset *must* + * be performed *before* switch to bias mode else VBB glitches. + */ + if (abb->ldo_base && info->opp_sel != TI_ABB_NOMINAL_OPP) ti_abb_program_ldovbb(dev, abb, info); /* Initiate ABB ldo change */ @@ -295,6 +299,14 @@ static int ti_abb_set_opp(struct regulator_dev *rdev, struct ti_abb *abb, if (ret) goto out; + /* + * Reset LDO VBB vset override bypass mode + * XXX: Do not switch sequence - for bypass, LDO override reset *must* + * be performed *after* switch to bypass else VBB glitches. + */ + if (abb->ldo_base && info->opp_sel == TI_ABB_NOMINAL_OPP) + ti_abb_program_ldovbb(dev, abb, info); + out: return ret; } diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c index 1432b26ef2e9..2205fbc2c37b 100644 --- a/drivers/regulator/wm831x-ldo.c +++ b/drivers/regulator/wm831x-ldo.c @@ -63,7 +63,7 @@ static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data) */ static const struct regulator_linear_range wm831x_gp_ldo_ranges[] = { - { .min_uV = 900000, .max_uV = 1650000, .min_sel = 0, .max_sel = 14, + { .min_uV = 900000, .max_uV = 1600000, .min_sel = 0, .max_sel = 14, .uV_step = 50000 }, { .min_uV = 1700000, .max_uV = 3300000, .min_sel = 15, .max_sel = 31, .uV_step = 100000 }, @@ -332,7 +332,7 @@ static struct platform_driver wm831x_gp_ldo_driver = { */ static const struct regulator_linear_range wm831x_aldo_ranges[] = { - { .min_uV = 1000000, .max_uV = 1650000, .min_sel = 0, .max_sel = 12, + { .min_uV = 1000000, .max_uV = 1600000, .min_sel = 0, .max_sel = 12, .uV_step = 50000 }, { .min_uV = 1700000, .max_uV = 3500000, .min_sel = 13, .max_sel = 31, .uV_step = 100000 }, diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c index 835b5f0f344e..61ca9292a429 100644 --- a/drivers/regulator/wm8350-regulator.c +++ b/drivers/regulator/wm8350-regulator.c @@ -543,7 +543,7 @@ static int wm8350_dcdc_set_suspend_mode(struct regulator_dev *rdev, } static const struct regulator_linear_range wm8350_ldo_ranges[] = { - { .min_uV = 900000, .max_uV = 1750000, .min_sel = 0, .max_sel = 15, + { .min_uV = 900000, .max_uV = 1650000, .min_sel = 0, .max_sel = 15, .uV_step = 50000 }, { .min_uV = 1800000, .max_uV = 3300000, .min_sel = 16, .max_sel = 31, .uV_step = 100000 }, diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 8cd34bf644b3..77df9cb00688 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -145,9 +145,11 @@ bool __init sclp_has_linemode(void) if (sccb->header.response_code != 0x20) return 0; - if (sccb->sclp_send_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)) - return 1; - return 0; + if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK))) + return 0; + if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))) + return 0; + return 1; } bool __init sclp_has_vt220(void) diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index a0f47c83fd62..3f4ca4e09a4c 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -810,7 +810,7 @@ static void tty3270_resize_work(struct work_struct *work) struct winsize ws; screen = tty3270_alloc_screen(tp->n_rows, tp->n_cols); - if (!screen) + if (IS_ERR(screen)) return; /* Switch to new output size */ spin_lock_bh(&tp->view.lock); diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index fd7cc566095a..d4ac60b4a56e 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1583,7 +1583,7 @@ static int atmel_spi_probe(struct platform_device *pdev) /* Initialize the hardware */ ret = clk_prepare_enable(clk); if (ret) - goto out_unmap_regs; + goto out_free_irq; spi_writel(as, CR, SPI_BIT(SWRST)); spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ if (as->caps.has_wdrbt) { @@ -1614,6 +1614,7 @@ out_free_dma: spi_writel(as, CR, SPI_BIT(SWRST)); spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ clk_disable_unprepare(clk); +out_free_irq: free_irq(irq, master); out_unmap_regs: iounmap(as->regs); diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c index 5655acf55bfe..6416798828e7 100644 --- a/drivers/spi/spi-clps711x.c +++ b/drivers/spi/spi-clps711x.c @@ -226,7 +226,6 @@ static int spi_clps711x_probe(struct platform_device *pdev) dev_name(&pdev->dev), hw); if (ret) { dev_err(&pdev->dev, "Can't request IRQ\n"); - clk_put(hw->spi_clk); goto clk_out; } @@ -247,7 +246,6 @@ err_out: gpio_free(hw->chipselect[i]); spi_master_put(master); - kfree(master); return ret; } @@ -263,7 +261,6 @@ static int spi_clps711x_remove(struct platform_device *pdev) gpio_free(hw->chipselect[i]); spi_unregister_master(master); - kfree(master); return 0; } diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 6cd07d13ecab..4e44575bd87a 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -476,15 +476,9 @@ static int dspi_probe(struct platform_device *pdev) master->bus_num = bus_num; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "can't get platform resource\n"); - ret = -EINVAL; - goto out_master_put; - } - dspi->base = devm_ioremap_resource(&pdev->dev, res); - if (!dspi->base) { - ret = -EINVAL; + if (IS_ERR(dspi->base)) { + ret = PTR_ERR(dspi->base); goto out_master_put; } diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index dbc5e999a1f5..6adf4e35816d 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -522,8 +522,10 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, psc_num = master->bus_num; snprintf(clk_name, sizeof(clk_name), "psc%d_mclk", psc_num); clk = devm_clk_get(dev, clk_name); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); goto free_irq; + } ret = clk_prepare_enable(clk); if (ret) goto free_irq; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 2eb06ee0b326..c1a50674c1e3 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -546,8 +546,17 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (pm_runtime_suspended(&drv_data->pdev->dev)) return IRQ_NONE; - sccr1_reg = read_SSCR1(reg); + /* + * If the device is not yet in RPM suspended state and we get an + * interrupt that is meant for another device, check if status bits + * are all set to one. That means that the device is already + * powered off. + */ status = read_SSSR(reg); + if (status == ~0) + return IRQ_NONE; + + sccr1_reg = read_SSCR1(reg); /* Ignore possible writes if we don't need to write */ if (!(sccr1_reg & SSCR1_TIE)) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 512b8893893b..a80376dc3a10 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1428,6 +1428,8 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN, sdd->regs + S3C64XX_SPI_INT_EN); + pm_runtime_enable(&pdev->dev); + if (spi_register_master(master)) { dev_err(&pdev->dev, "cannot register SPI master\n"); ret = -EBUSY; @@ -1440,8 +1442,6 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) mem_res, sdd->rx_dma.dmach, sdd->tx_dma.dmach); - pm_runtime_enable(&pdev->dev); - return 0; err3: diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c index 0b68cb592fa4..e488a90a98b8 100644 --- a/drivers/spi/spi-sh-hspi.c +++ b/drivers/spi/spi-sh-hspi.c @@ -296,6 +296,8 @@ static int hspi_probe(struct platform_device *pdev) goto error1; } + pm_runtime_enable(&pdev->dev); + master->num_chipselect = 1; master->bus_num = pdev->id; master->setup = hspi_setup; @@ -309,8 +311,6 @@ static int hspi_probe(struct platform_device *pdev) goto error1; } - pm_runtime_enable(&pdev->dev); - return 0; error1: diff --git a/drivers/staging/comedi/drivers/ni_65xx.c b/drivers/staging/comedi/drivers/ni_65xx.c index 3ba4c5712dff..853f62b2b1a9 100644 --- a/drivers/staging/comedi/drivers/ni_65xx.c +++ b/drivers/staging/comedi/drivers/ni_65xx.c @@ -369,28 +369,23 @@ static int ni_65xx_dio_insn_bits(struct comedi_device *dev, { const struct ni_65xx_board *board = comedi_board(dev); struct ni_65xx_private *devpriv = dev->private; - unsigned base_bitfield_channel; - const unsigned max_ports_per_bitfield = 5; + int base_bitfield_channel; unsigned read_bits = 0; - unsigned j; + int last_port_offset = ni_65xx_port_by_channel(s->n_chan - 1); + int port_offset; base_bitfield_channel = CR_CHAN(insn->chanspec); - for (j = 0; j < max_ports_per_bitfield; ++j) { - const unsigned port_offset = - ni_65xx_port_by_channel(base_bitfield_channel) + j; - const unsigned port = - sprivate(s)->base_port + port_offset; - unsigned base_port_channel; + for (port_offset = ni_65xx_port_by_channel(base_bitfield_channel); + port_offset <= last_port_offset; port_offset++) { + unsigned port = sprivate(s)->base_port + port_offset; + int base_port_channel = port_offset * ni_65xx_channels_per_port; unsigned port_mask, port_data, port_read_bits; - int bitshift; - if (port >= ni_65xx_total_num_ports(board)) + int bitshift = base_port_channel - base_bitfield_channel; + + if (bitshift >= 32) break; - base_port_channel = port_offset * ni_65xx_channels_per_port; port_mask = data[0]; port_data = data[1]; - bitshift = base_port_channel - base_bitfield_channel; - if (bitshift >= 32 || bitshift <= -32) - break; if (bitshift > 0) { port_mask >>= bitshift; port_data >>= bitshift; diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c index 47c5888461ff..a2e52a0c53c9 100644 --- a/drivers/staging/imx-drm/imx-drm-core.c +++ b/drivers/staging/imx-drm/imx-drm-core.c @@ -41,7 +41,6 @@ struct imx_drm_device { struct list_head encoder_list; struct list_head connector_list; struct mutex mutex; - int references; int pipes; struct drm_fbdev_cma *fbhelper; }; @@ -241,8 +240,6 @@ struct drm_device *imx_drm_device_get(void) } } - imxdrm->references++; - return imxdrm->drm; unwind_crtc: @@ -280,8 +277,6 @@ void imx_drm_device_put(void) list_for_each_entry(enc, &imxdrm->encoder_list, list) module_put(enc->owner); - imxdrm->references--; - mutex_unlock(&imxdrm->mutex); } EXPORT_SYMBOL_GPL(imx_drm_device_put); @@ -485,7 +480,7 @@ int imx_drm_add_crtc(struct drm_crtc *crtc, mutex_lock(&imxdrm->mutex); - if (imxdrm->references) { + if (imxdrm->drm->open_count) { ret = -EBUSY; goto err_busy; } @@ -564,7 +559,7 @@ int imx_drm_add_encoder(struct drm_encoder *encoder, mutex_lock(&imxdrm->mutex); - if (imxdrm->references) { + if (imxdrm->drm->open_count) { ret = -EBUSY; goto err_busy; } @@ -709,7 +704,7 @@ int imx_drm_add_connector(struct drm_connector *connector, mutex_lock(&imxdrm->mutex); - if (imxdrm->references) { + if (imxdrm->drm->open_count) { ret = -EBUSY; goto err_busy; } diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index 2644edf438c1..c8b43442dc74 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -1387,7 +1387,7 @@ echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) if (nob > ulsm_nob) return (-EINVAL); - if (copy_to_user (ulsm, lsm, sizeof(ulsm))) + if (copy_to_user (ulsm, lsm, sizeof(*ulsm))) return (-EFAULT); for (i = 0; i < lsm->lsm_stripe_count; i++) { diff --git a/drivers/staging/octeon-usb/cvmx-usb.c b/drivers/staging/octeon-usb/cvmx-usb.c index d7b3c82b5ead..45dfe94199ae 100644 --- a/drivers/staging/octeon-usb/cvmx-usb.c +++ b/drivers/staging/octeon-usb/cvmx-usb.c @@ -604,7 +604,7 @@ int cvmx_usb_initialize(struct cvmx_usb_state *state, int usb_port_number, } } - memset(usb, 0, sizeof(usb)); + memset(usb, 0, sizeof(*usb)); usb->init_flags = flags; /* Initialize the USB state structure */ diff --git a/drivers/staging/rtl8188eu/core/rtw_mp.c b/drivers/staging/rtl8188eu/core/rtw_mp.c index c7ff2e4d1f23..9832dcbbd07f 100644 --- a/drivers/staging/rtl8188eu/core/rtw_mp.c +++ b/drivers/staging/rtl8188eu/core/rtw_mp.c @@ -907,7 +907,7 @@ u32 mp_query_psd(struct adapter *pAdapter, u8 *data) sscanf(data, "pts =%d, start =%d, stop =%d", &psd_pts, &psd_start, &psd_stop); } - _rtw_memset(data, '\0', sizeof(data)); + _rtw_memset(data, '\0', sizeof(*data)); i = psd_start; while (i < psd_stop) { diff --git a/drivers/staging/rtl8188eu/hal/rtl8188e_dm.c b/drivers/staging/rtl8188eu/hal/rtl8188e_dm.c index 9c2e7a20c09e..ec0028d4e61a 100644 --- a/drivers/staging/rtl8188eu/hal/rtl8188e_dm.c +++ b/drivers/staging/rtl8188eu/hal/rtl8188e_dm.c @@ -57,7 +57,7 @@ static void Init_ODM_ComInfo_88E(struct adapter *Adapter) u8 cut_ver, fab_ver; /* Init Value */ - _rtw_memset(dm_odm, 0, sizeof(dm_odm)); + _rtw_memset(dm_odm, 0, sizeof(*dm_odm)); dm_odm->Adapter = Adapter; diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index cd4100fb3645..95953ebc0279 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -6973,7 +6973,7 @@ static int rtw_mp_ctx(struct net_device *dev, stop = strncmp(extra, "stop", 4); sscanf(extra, "count =%d, pkt", &count); - _rtw_memset(extra, '\0', sizeof(extra)); + _rtw_memset(extra, '\0', sizeof(*extra)); if (stop == 0) { bStartTest = 0; /* To set Stop */ diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index d3078d200e50..9ca3180ebaa0 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -54,6 +54,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { /*=== Customer ID ===*/ /****** 8188EUS ********/ {USB_DEVICE(0x8179, 0x07B8)}, /* Abocom - Abocom */ + {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {} /* Terminating entry */ }; diff --git a/drivers/staging/rtl8192u/r819xU_cmdpkt.c b/drivers/staging/rtl8192u/r819xU_cmdpkt.c index 5bc361b16d4c..56144014b7c9 100644 --- a/drivers/staging/rtl8192u/r819xU_cmdpkt.c +++ b/drivers/staging/rtl8192u/r819xU_cmdpkt.c @@ -37,6 +37,8 @@ rt_status SendTxCommandPacket(struct net_device *dev, void *pData, u32 DataLen) /* Get TCB and local buffer from common pool. (It is shared by CmdQ, MgntQ, and USB coalesce DataQ) */ skb = dev_alloc_skb(USB_HWDESC_HEADER_LEN + DataLen + 4); + if (!skb) + return RT_STATUS_FAILURE; memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev)); tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE); tcb_desc->queue_index = TXCMD_QUEUE; diff --git a/drivers/staging/vt6656/iwctl.c b/drivers/staging/vt6656/iwctl.c index d0cf7d8a20e5..8872e0f84f40 100644 --- a/drivers/staging/vt6656/iwctl.c +++ b/drivers/staging/vt6656/iwctl.c @@ -1634,6 +1634,9 @@ int iwctl_siwencodeext(struct net_device *dev, struct iw_request_info *info, if (pMgmt == NULL) return -EFAULT; + if (!(pDevice->flags & DEVICE_FLAGS_OPENED)) + return -ENODEV; + buf = kzalloc(sizeof(struct viawget_wpa_param), GFP_KERNEL); if (buf == NULL) return -ENOMEM; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 536971786ae8..6f9d28182445 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -1098,6 +1098,8 @@ static int device_close(struct net_device *dev) memset(pMgmt->abyCurrBSSID, 0, 6); pMgmt->eCurrState = WMAC_STATE_IDLE; + pDevice->flags &= ~DEVICE_FLAGS_OPENED; + device_free_tx_bufs(pDevice); device_free_rx_bufs(pDevice); device_free_int_bufs(pDevice); @@ -1109,7 +1111,6 @@ static int device_close(struct net_device *dev) usb_free_urb(pDevice->pInterruptURB); BSSvClearNodeDBTable(pDevice, 0); - pDevice->flags &=(~DEVICE_FLAGS_OPENED); DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "device_close2 \n"); diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index fb743a8811bb..14f3e852215d 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -148,6 +148,8 @@ static void *s_vGetFreeContext(struct vnt_private *pDevice) DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"GetFreeContext()\n"); for (ii = 0; ii < pDevice->cbTD; ii++) { + if (!pDevice->apTD[ii]) + return NULL; pContext = pDevice->apTD[ii]; if (pContext->bBoolInUse == false) { pContext->bBoolInUse = true; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 35b61f7d6c63..38e44b9abf0f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -753,7 +753,8 @@ static void iscsit_unmap_iovec(struct iscsi_cmd *cmd) static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) { - struct iscsi_cmd *cmd; + LIST_HEAD(ack_list); + struct iscsi_cmd *cmd, *cmd_p; conn->exp_statsn = exp_statsn; @@ -761,19 +762,23 @@ static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) return; spin_lock_bh(&conn->cmd_lock); - list_for_each_entry(cmd, &conn->conn_cmd_list, i_conn_node) { + list_for_each_entry_safe(cmd, cmd_p, &conn->conn_cmd_list, i_conn_node) { spin_lock(&cmd->istate_lock); if ((cmd->i_state == ISTATE_SENT_STATUS) && iscsi_sna_lt(cmd->stat_sn, exp_statsn)) { cmd->i_state = ISTATE_REMOVE; spin_unlock(&cmd->istate_lock); - iscsit_add_cmd_to_immediate_queue(cmd, conn, - cmd->i_state); + list_move_tail(&cmd->i_conn_node, &ack_list); continue; } spin_unlock(&cmd->istate_lock); } spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, cmd_p, &ack_list, i_conn_node) { + list_del(&cmd->i_conn_node); + iscsit_free_cmd(cmd, false); + } } static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 14d1aed5af1d..ef6d836a4d09 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1192,7 +1192,7 @@ get_target: */ alloc_tags: tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth); - tag_num += ISCSIT_EXTRA_TAGS; + tag_num += (tag_num / 2) + ISCSIT_EXTRA_TAGS; tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size); diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index f2de28e178fd..b0cac0c342e1 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -736,7 +736,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) * Fallthrough */ case ISCSI_OP_SCSI_TMFUNC: - rc = transport_generic_free_cmd(&cmd->se_cmd, 1); + rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); target_put_sess_cmd(se_cmd->se_sess, se_cmd); @@ -752,7 +752,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) se_cmd = &cmd->se_cmd; __iscsit_free_cmd(cmd, true, shutdown); - rc = transport_generic_free_cmd(&cmd->se_cmd, 1); + rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); target_put_sess_cmd(se_cmd->se_sess, se_cmd); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 6c17295e8d7c..4714c6f8da4b 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -349,7 +349,16 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + /* + * Only set SCF_COMPARE_AND_WRITE_POST to force a response fall-through + * within target_complete_ok_work() if the command was successfully + * sent to the backend driver. + */ + spin_lock_irq(&cmd->t_state_lock); + if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) + cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + spin_unlock_irq(&cmd->t_state_lock); + /* * Unlock ->caw_sem originally obtained during sbc_compare_and_write() * before the original READ I/O submission. @@ -363,7 +372,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; struct scatterlist *write_sg = NULL, *sg; - unsigned char *buf, *addr; + unsigned char *buf = NULL, *addr; struct sg_mapping_iter m; unsigned int offset = 0, len; unsigned int nlbas = cmd->t_task_nolb; @@ -378,6 +387,15 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) */ if (!cmd->t_data_sg || !cmd->t_bidi_data_sg) return TCM_NO_SENSE; + /* + * Immediately exit + release dev->caw_sem if command has already + * been failed with a non-zero SCSI status. + */ + if (cmd->scsi_status) { + pr_err("compare_and_write_callback: non zero scsi_status:" + " 0x%02x\n", cmd->scsi_status); + goto out; + } buf = kzalloc(cmd->data_length, GFP_KERNEL); if (!buf) { @@ -508,6 +526,12 @@ sbc_compare_and_write(struct se_cmd *cmd) cmd->transport_complete_callback = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } + /* + * Reset cmd->data_length to individual block_size in order to not + * confuse backend drivers that depend on this value matching the + * size of the I/O being submitted. + */ + cmd->data_length = cmd->t_task_nolb * dev->dev_attrib.block_size; ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, DMA_FROM_DEVICE); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 84747cc1aac0..81e945eefbbd 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -236,17 +236,24 @@ int transport_alloc_session_tags(struct se_session *se_sess, { int rc; - se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL); + se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, + GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!se_sess->sess_cmd_map) { - pr_err("Unable to allocate se_sess->sess_cmd_map\n"); - return -ENOMEM; + se_sess->sess_cmd_map = vzalloc(tag_num * tag_size); + if (!se_sess->sess_cmd_map) { + pr_err("Unable to allocate se_sess->sess_cmd_map\n"); + return -ENOMEM; + } } rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num); if (rc < 0) { pr_err("Unable to init se_sess->sess_tag_pool," " tag_num: %u\n", tag_num); - kfree(se_sess->sess_cmd_map); + if (is_vmalloc_addr(se_sess->sess_cmd_map)) + vfree(se_sess->sess_cmd_map); + else + kfree(se_sess->sess_cmd_map); se_sess->sess_cmd_map = NULL; return -ENOMEM; } @@ -412,7 +419,10 @@ void transport_free_session(struct se_session *se_sess) { if (se_sess->sess_cmd_map) { percpu_ida_destroy(&se_sess->sess_tag_pool); - kfree(se_sess->sess_cmd_map); + if (is_vmalloc_addr(se_sess->sess_cmd_map)) + vfree(se_sess->sess_cmd_map); + else + kfree(se_sess->sess_cmd_map); } kmem_cache_free(se_sess_cache, se_sess); } diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 4d22e7d2adca..3da4fd10b9f8 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -298,8 +298,8 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op (unsigned long long)xop->dst_lba); if (dc != 0) { - xop->dbl = (desc[29] << 16) & 0xff; - xop->dbl |= (desc[30] << 8) & 0xff; + xop->dbl = (desc[29] & 0xff) << 16; + xop->dbl |= (desc[30] & 0xff) << 8; xop->dbl |= desc[31] & 0xff; pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl); diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index e61c36cbb866..c193af6a628f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -636,6 +636,7 @@ struct console xenboot_console = { .name = "xenboot", .write = xenboot_write_console, .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME, + .index = -1, }; #endif /* CONFIG_EARLY_PRINTK */ diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index c9a9ddd1d0bc..7a744b69c3d1 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1758,8 +1758,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) canon_change = (old->c_lflag ^ tty->termios.c_lflag) & ICANON; if (canon_change) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); - ldata->line_start = 0; - ldata->canon_head = ldata->read_tail; + ldata->line_start = ldata->canon_head = ldata->read_tail; ldata->erasing = 0; ldata->lnext = 0; } @@ -2184,28 +2183,34 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, if (!input_available_p(tty, 0)) { if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) { - retval = -EIO; - break; - } - if (tty_hung_up_p(file)) - break; - if (!timeout) - break; - if (file->f_flags & O_NONBLOCK) { - retval = -EAGAIN; - break; - } - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - n_tty_set_room(tty); - up_read(&tty->termios_rwsem); + up_read(&tty->termios_rwsem); + tty_flush_to_ldisc(tty); + down_read(&tty->termios_rwsem); + if (!input_available_p(tty, 0)) { + retval = -EIO; + break; + } + } else { + if (tty_hung_up_p(file)) + break; + if (!timeout) + break; + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + break; + } + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + n_tty_set_room(tty); + up_read(&tty->termios_rwsem); - timeout = schedule_timeout(timeout); + timeout = schedule_timeout(timeout); - down_read(&tty->termios_rwsem); - continue; + down_read(&tty->termios_rwsem); + continue; + } } __set_current_state(TASK_RUNNING); diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 52379e56a31e..44077c0b7670 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -667,30 +667,21 @@ static int pop_tx_x(struct eg20t_port *priv, unsigned char *buf) static int dma_push_rx(struct eg20t_port *priv, int size) { - struct tty_struct *tty; int room; struct uart_port *port = &priv->port; struct tty_port *tport = &port->state->port; - port = &priv->port; - tty = tty_port_tty_get(tport); - if (!tty) { - dev_dbg(priv->port.dev, "%s:tty is busy now", __func__); - return 0; - } - room = tty_buffer_request_room(tport, size); if (room < size) dev_warn(port->dev, "Rx overrun: dropping %u bytes\n", size - room); if (!room) - return room; + return 0; tty_insert_flip_string(tport, sg_virt(&priv->sg_rx), size); port->icount.rx += room; - tty_kref_put(tty); return room; } @@ -1098,6 +1089,8 @@ static void pch_uart_err_ir(struct eg20t_port *priv, unsigned int lsr) if (tty == NULL) { for (i = 0; error_msg[i] != NULL; i++) dev_err(&priv->pdev->dev, error_msg[i]); + } else { + tty_kref_put(tty); } } diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index d0d972f7e43e..0489a2bdcdf9 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -732,7 +732,7 @@ static irqreturn_t tegra_uart_isr(int irq, void *data) static void tegra_uart_stop_rx(struct uart_port *u) { struct tegra_uart_port *tup = to_tegra_uport(u); - struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port); + struct tty_struct *tty; struct tty_port *port = &u->state->port; struct dma_tx_state state; unsigned long ier; @@ -744,6 +744,8 @@ static void tegra_uart_stop_rx(struct uart_port *u) if (!tup->rx_in_progress) return; + tty = tty_port_tty_get(&tup->uport.state->port); + tegra_uart_wait_sym_time(tup, 1); /* wait a character interval */ ier = tup->ier_shadow; diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 03ba081c5772..6fd60fece6b4 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -1201,6 +1201,9 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, } return 0; case TCFLSH: + retval = tty_check_change(tty); + if (retval) + return retval; return __tty_perform_flush(tty, arg); default: /* Try the mode commands */ diff --git a/drivers/usb/chipidea/Kconfig b/drivers/usb/chipidea/Kconfig index 4a851e15e58c..77b47d82c9a6 100644 --- a/drivers/usb/chipidea/Kconfig +++ b/drivers/usb/chipidea/Kconfig @@ -1,6 +1,6 @@ config USB_CHIPIDEA tristate "ChipIdea Highspeed Dual Role Controller" - depends on (USB_EHCI_HCD && USB_GADGET) || (USB_EHCI_HCD && !USB_GADGET) || (!USB_EHCI_HCD && USB_GADGET) + depends on ((USB_EHCI_HCD && USB_GADGET) || (USB_EHCI_HCD && !USB_GADGET) || (!USB_EHCI_HCD && USB_GADGET)) && HAS_DMA help Say Y here if your system has a dual role high speed USB controller based on ChipIdea silicon IP. Currently, only the diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 74d998d9b45b..be822a2c1776 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -131,7 +131,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "usbmisc init failed, ret=%d\n", ret); - goto err_clk; + goto err_phy; } } @@ -143,7 +143,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Can't register ci_hdrc platform device, err=%d\n", ret); - goto err_clk; + goto err_phy; } if (data->usbmisc_data) { @@ -164,6 +164,9 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); +err_phy: + if (data->phy) + usb_phy_shutdown(data->phy); err_clk: clk_disable_unprepare(data->clk); return ret; diff --git a/drivers/usb/chipidea/ci_hdrc_pci.c b/drivers/usb/chipidea/ci_hdrc_pci.c index 042320a6c6c7..d514332ac081 100644 --- a/drivers/usb/chipidea/ci_hdrc_pci.c +++ b/drivers/usb/chipidea/ci_hdrc_pci.c @@ -129,7 +129,12 @@ static DEFINE_PCI_DEVICE_TABLE(ci_hdrc_pci_id_table) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0829), .driver_data = (kernel_ulong_t)&penwell_pci_platdata, }, - { 0, 0, 0, 0, 0, 0, 0 /* end: all zeroes */ } + { + /* Intel Clovertrail */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe006), + .driver_data = (kernel_ulong_t)&penwell_pci_platdata, + }, + { 0 } /* end: all zeroes */ }; MODULE_DEVICE_TABLE(pci, ci_hdrc_pci_id_table); diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 94626409559a..23763dcec069 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -605,6 +605,7 @@ static int ci_hdrc_remove(struct platform_device *pdev) dbg_remove_files(ci); free_irq(ci->irq, ci); ci_role_destroy(ci); + kfree(ci->hw_bank.regmap); return 0; } diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 6b4c2f2eb946..9333083dd111 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1600,6 +1600,8 @@ static void destroy_eps(struct ci_hdrc *ci) for (i = 0; i < ci->hw_ep_max; i++) { struct ci_hw_ep *hwep = &ci->ci_hw_ep[i]; + if (hwep->pending_td) + free_pending_td(hwep); dma_pool_free(ci->qh_pool, hwep->qh.ptr, hwep->qh.dma); } } @@ -1667,13 +1669,13 @@ static int ci_udc_stop(struct usb_gadget *gadget, if (ci->platdata->notify_event) ci->platdata->notify_event(ci, CI_HDRC_CONTROLLER_STOPPED_EVENT); - ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); _gadget_stop_activity(&ci->gadget); spin_lock_irqsave(&ci->lock, flags); pm_runtime_put(&ci->gadget.dev); } + ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); return 0; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 737e3c19967b..71dc5d768fa5 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -742,6 +742,22 @@ static int check_ctrlrecip(struct dev_state *ps, unsigned int requesttype, if ((index & ~USB_DIR_IN) == 0) return 0; ret = findintfep(ps->dev, index); + if (ret < 0) { + /* + * Some not fully compliant Win apps seem to get + * index wrong and have the endpoint number here + * rather than the endpoint address (with the + * correct direction). Win does let this through, + * so we'll not reject it here but leave it to + * the device to not break KVM. But we warn. + */ + ret = findintfep(ps->dev, index ^ 0x80); + if (ret >= 0) + dev_info(&ps->dev->dev, + "%s: process %i (%s) requesting ep %02x but needs %02x\n", + __func__, task_pid_nr(current), + current->comm, index, index ^ 0x80); + } if (ret >= 0) ret = checkintf(ps, ret); break; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index dde4c83516a1..e6b682c6c236 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3426,6 +3426,9 @@ static int usb_req_set_sel(struct usb_device *udev, enum usb3_link_state state) unsigned long long u2_pel; int ret; + if (udev->state != USB_STATE_CONFIGURED) + return 0; + /* Convert SEL and PEL stored in ns to us */ u1_sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); u1_pel = DIV_ROUND_UP(udev->u1_params.pel, 1000); diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 997ebe420bc9..2e252aae51ca 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -29,6 +29,7 @@ #define PCI_VENDOR_ID_SYNOPSYS 0x16c3 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 +#define PCI_DEVICE_ID_INTEL_MRFLD 0x119e struct dwc3_pci { struct device *dev; @@ -189,6 +190,7 @@ static DEFINE_PCI_DEVICE_TABLE(dwc3_pci_id_table) = { PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, { } /* Terminating Entry */ }; MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 1a66c5baa0d1..44cf775a8627 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1034,37 +1034,19 @@ struct ffs_sb_fill_data { struct ffs_file_perms perms; umode_t root_mode; const char *dev_name; - union { - /* set by ffs_fs_mount(), read by ffs_sb_fill() */ - void *private_data; - /* set by ffs_sb_fill(), read by ffs_fs_mount */ - struct ffs_data *ffs_data; - }; + struct ffs_data *ffs_data; }; static int ffs_sb_fill(struct super_block *sb, void *_data, int silent) { struct ffs_sb_fill_data *data = _data; struct inode *inode; - struct ffs_data *ffs; + struct ffs_data *ffs = data->ffs_data; ENTER(); - /* Initialise data */ - ffs = ffs_data_new(); - if (unlikely(!ffs)) - goto Enomem; - ffs->sb = sb; - ffs->dev_name = kstrdup(data->dev_name, GFP_KERNEL); - if (unlikely(!ffs->dev_name)) - goto Enomem; - ffs->file_perms = data->perms; - ffs->private_data = data->private_data; - - /* used by the caller of this function */ - data->ffs_data = ffs; - + data->ffs_data = NULL; sb->s_fs_info = ffs; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1080,17 +1062,14 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent) &data->perms); sb->s_root = d_make_root(inode); if (unlikely(!sb->s_root)) - goto Enomem; + return -ENOMEM; /* EP0 file */ if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs, &ffs_ep0_operations, NULL))) - goto Enomem; + return -ENOMEM; return 0; - -Enomem: - return -ENOMEM; } static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts) @@ -1193,6 +1172,7 @@ ffs_fs_mount(struct file_system_type *t, int flags, struct dentry *rv; int ret; void *ffs_dev; + struct ffs_data *ffs; ENTER(); @@ -1200,18 +1180,30 @@ ffs_fs_mount(struct file_system_type *t, int flags, if (unlikely(ret < 0)) return ERR_PTR(ret); + ffs = ffs_data_new(); + if (unlikely(!ffs)) + return ERR_PTR(-ENOMEM); + ffs->file_perms = data.perms; + + ffs->dev_name = kstrdup(dev_name, GFP_KERNEL); + if (unlikely(!ffs->dev_name)) { + ffs_data_put(ffs); + return ERR_PTR(-ENOMEM); + } + ffs_dev = functionfs_acquire_dev_callback(dev_name); - if (IS_ERR(ffs_dev)) - return ffs_dev; + if (IS_ERR(ffs_dev)) { + ffs_data_put(ffs); + return ERR_CAST(ffs_dev); + } + ffs->private_data = ffs_dev; + data.ffs_data = ffs; - data.dev_name = dev_name; - data.private_data = ffs_dev; rv = mount_nodev(t, flags, &data, ffs_sb_fill); - - /* data.ffs_data is set by ffs_sb_fill */ - if (IS_ERR(rv)) + if (IS_ERR(rv) && data.ffs_data) { functionfs_release_dev_callback(data.ffs_data); - + ffs_data_put(data.ffs_data); + } return rv; } @@ -2264,6 +2256,8 @@ static int ffs_func_bind(struct usb_configuration *c, data->raw_descs + ret, (sizeof data->raw_descs) - ret, __ffs_func_bind_do_descs, func); + if (unlikely(ret < 0)) + goto error; } /* diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index cc9207473dbc..0ac6064aa3b8 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -2054,7 +2054,7 @@ static struct pxa25x_udc memory = { /* * probe - binds to the platform device */ -static int __init pxa25x_udc_probe(struct platform_device *pdev) +static int pxa25x_udc_probe(struct platform_device *pdev) { struct pxa25x_udc *dev = &memory; int retval, irq; @@ -2203,7 +2203,7 @@ static void pxa25x_udc_shutdown(struct platform_device *_dev) pullup_off(); } -static int __exit pxa25x_udc_remove(struct platform_device *pdev) +static int pxa25x_udc_remove(struct platform_device *pdev) { struct pxa25x_udc *dev = platform_get_drvdata(pdev); @@ -2294,7 +2294,8 @@ static int pxa25x_udc_resume(struct platform_device *dev) static struct platform_driver udc_driver = { .shutdown = pxa25x_udc_shutdown, - .remove = __exit_p(pxa25x_udc_remove), + .probe = pxa25x_udc_probe, + .remove = pxa25x_udc_remove, .suspend = pxa25x_udc_suspend, .resume = pxa25x_udc_resume, .driver = { @@ -2303,7 +2304,7 @@ static struct platform_driver udc_driver = { }, }; -module_platform_driver_probe(udc_driver, pxa25x_udc_probe); +module_platform_driver(udc_driver); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_AUTHOR("Frank Becker, Robert Schwebel, David Brownell"); diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c index 6bddf1aa2347..a8a99e4748d5 100644 --- a/drivers/usb/gadget/s3c-hsotg.c +++ b/drivers/usb/gadget/s3c-hsotg.c @@ -543,7 +543,7 @@ static int s3c_hsotg_write_fifo(struct s3c_hsotg *hsotg, * FIFO, requests of >512 cause the endpoint to get stuck with a * fragment of the end of the transfer in it. */ - if (can_write > 512) + if (can_write > 512 && !periodic) can_write = 512; /* diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 4449f565d6c6..f2407b2e8a99 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -130,7 +130,7 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver, } /* Enable USB controller, 83xx or 8536 */ - if (pdata->have_sysif_regs) + if (pdata->have_sysif_regs && pdata->controller_ver < FSL_USB_VER_1_6) setbits32(hcd->regs + FSL_SOC_USB_CTRL, 0x4); /* Don't need to set host mode here. It will be done by tdi_reset() */ @@ -232,15 +232,9 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd, case FSL_USB2_PHY_ULPI: if (pdata->have_sysif_regs && pdata->controller_ver) { /* controller version 1.6 or above */ + clrbits32(non_ehci + FSL_SOC_USB_CTRL, UTMI_PHY_EN); setbits32(non_ehci + FSL_SOC_USB_CTRL, - ULPI_PHY_CLK_SEL); - /* - * Due to controller issue of PHY_CLK_VALID in ULPI - * mode, we set USB_CTRL_USB_EN before checking - * PHY_CLK_VALID, otherwise PHY_CLK_VALID doesn't work. - */ - clrsetbits_be32(non_ehci + FSL_SOC_USB_CTRL, - UTMI_PHY_EN, USB_CTRL_USB_EN); + ULPI_PHY_CLK_SEL | USB_CTRL_USB_EN); } portsc |= PORT_PTS_ULPI; break; @@ -270,8 +264,9 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd, if (pdata->have_sysif_regs && pdata->controller_ver && (phy_mode == FSL_USB2_PHY_ULPI)) { /* check PHY_CLK_VALID to get phy clk valid */ - if (!spin_event_timeout(in_be32(non_ehci + FSL_SOC_USB_CTRL) & - PHY_CLK_VALID, FSL_USB_PHY_CLK_TIMEOUT, 0)) { + if (!(spin_event_timeout(in_be32(non_ehci + FSL_SOC_USB_CTRL) & + PHY_CLK_VALID, FSL_USB_PHY_CLK_TIMEOUT, 0) || + in_be32(non_ehci + FSL_SOC_USB_PRICTRL))) { printk(KERN_WARNING "fsl-ehci: USB PHY clock invalid\n"); return -EINVAL; } diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 6bd299e61f58..854c2ec7b699 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -361,7 +361,7 @@ static struct pci_driver ehci_pci_driver = { .remove = usb_hcd_pci_remove, .shutdown = usb_hcd_pci_shutdown, -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM .driver = { .pm = &usb_hcd_pci_pm_ops }, diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c index 60a5de505ca1..adb01d950a16 100644 --- a/drivers/usb/host/imx21-hcd.c +++ b/drivers/usb/host/imx21-hcd.c @@ -824,13 +824,13 @@ static int imx21_hc_urb_enqueue_isoc(struct usb_hcd *hcd, i = DIV_ROUND_UP(wrap_frame( cur_frame - urb->start_frame), urb->interval); - if (urb->transfer_flags & URB_ISO_ASAP) { + + /* Treat underruns as if URB_ISO_ASAP was set */ + if ((urb->transfer_flags & URB_ISO_ASAP) || + i >= urb->number_of_packets) { urb->start_frame = wrap_frame(urb->start_frame + i * urb->interval); i = 0; - } else if (i >= urb->number_of_packets) { - ret = -EXDEV; - goto alloc_dmem_failed; } } } diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 8f6b695af6a4..604cad1bcf9c 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -216,31 +216,26 @@ static int ohci_urb_enqueue ( frame &= ~(ed->interval - 1); frame |= ed->branch; urb->start_frame = frame; + ed->last_iso = frame + ed->interval * (size - 1); } } else if (ed->type == PIPE_ISOCHRONOUS) { u16 next = ohci_frame_no(ohci) + 1; u16 frame = ed->last_iso + ed->interval; + u16 length = ed->interval * (size - 1); /* Behind the scheduling threshold? */ if (unlikely(tick_before(frame, next))) { - /* USB_ISO_ASAP: Round up to the first available slot */ + /* URB_ISO_ASAP: Round up to the first available slot */ if (urb->transfer_flags & URB_ISO_ASAP) { frame += (next - frame + ed->interval - 1) & -ed->interval; /* - * Not ASAP: Use the next slot in the stream. If - * the entire URB falls before the threshold, fail. + * Not ASAP: Use the next slot in the stream, + * no matter what. */ } else { - if (tick_before(frame + ed->interval * - (urb->number_of_packets - 1), next)) { - retval = -EXDEV; - usb_hcd_unlink_urb_from_ep(hcd, urb); - goto fail; - } - /* * Some OHCI hardware doesn't handle late TDs * correctly. After retiring them it proceeds @@ -251,9 +246,16 @@ static int ohci_urb_enqueue ( urb_priv->td_cnt = DIV_ROUND_UP( (u16) (next - frame), ed->interval); + if (urb_priv->td_cnt >= urb_priv->length) { + ++urb_priv->td_cnt; /* Mark it */ + ohci_dbg(ohci, "iso underrun %p (%u+%u < %u)\n", + urb, frame, length, + next); + } } } urb->start_frame = frame; + ed->last_iso = frame + length; } /* fill the TDs and link them to the ed; and diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index df4a6707322d..e7f577e63624 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -41,9 +41,13 @@ finish_urb(struct ohci_hcd *ohci, struct urb *urb, int status) __releases(ohci->lock) __acquires(ohci->lock) { - struct device *dev = ohci_to_hcd(ohci)->self.controller; + struct device *dev = ohci_to_hcd(ohci)->self.controller; + struct usb_host_endpoint *ep = urb->ep; + struct urb_priv *urb_priv; + // ASSERT (urb->hcpriv != 0); + restart: urb_free_priv (ohci, urb->hcpriv); urb->hcpriv = NULL; if (likely(status == -EINPROGRESS)) @@ -80,6 +84,21 @@ __acquires(ohci->lock) ohci->hc_control &= ~(OHCI_CTRL_PLE|OHCI_CTRL_IE); ohci_writel (ohci, ohci->hc_control, &ohci->regs->control); } + + /* + * An isochronous URB that is sumitted too late won't have any TDs + * (marked by the fact that the td_cnt value is larger than the + * actual number of TDs). If the next URB on this endpoint is like + * that, give it back now. + */ + if (!list_empty(&ep->urb_list)) { + urb = list_first_entry(&ep->urb_list, struct urb, urb_list); + urb_priv = urb->hcpriv; + if (urb_priv->td_cnt > urb_priv->length) { + status = 0; + goto restart; + } + } } @@ -546,7 +565,6 @@ td_fill (struct ohci_hcd *ohci, u32 info, td->hwCBP = cpu_to_hc32 (ohci, data & 0xFFFFF000); *ohci_hwPSWp(ohci, td, 0) = cpu_to_hc16 (ohci, (data & 0x0FFF) | 0xE000); - td->ed->last_iso = info & 0xffff; } else { td->hwCBP = cpu_to_hc32 (ohci, data); } @@ -996,7 +1014,7 @@ rescan_this: urb_priv->td_cnt++; /* if URB is done, clean up */ - if (urb_priv->td_cnt == urb_priv->length) { + if (urb_priv->td_cnt >= urb_priv->length) { modified = completed = 1; finish_urb(ohci, urb, 0); } @@ -1086,7 +1104,7 @@ static void takeback_td(struct ohci_hcd *ohci, struct td *td) urb_priv->td_cnt++; /* If all this urb's TDs are done, call complete() */ - if (urb_priv->td_cnt == urb_priv->length) + if (urb_priv->td_cnt >= urb_priv->length) finish_urb(ohci, urb, status); /* clean schedule: unlink EDs that are no longer busy */ diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c index c300bd2f7d1c..0f228c46eeda 100644 --- a/drivers/usb/host/uhci-pci.c +++ b/drivers/usb/host/uhci-pci.c @@ -293,7 +293,7 @@ static struct pci_driver uhci_pci_driver = { .remove = usb_hcd_pci_remove, .shutdown = uhci_shutdown, -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM .driver = { .pm = &usb_hcd_pci_pm_ops }, diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index 041c6ddb695c..da6f56d996ce 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -1303,7 +1303,7 @@ static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb, } /* Fell behind? */ - if (uhci_frame_before_eq(frame, next)) { + if (!uhci_frame_before_eq(next, frame)) { /* USB_ISO_ASAP: Round up to the first available slot */ if (urb->transfer_flags & URB_ISO_ASAP) @@ -1311,13 +1311,17 @@ static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb, -qh->period; /* - * Not ASAP: Use the next slot in the stream. If - * the entire URB falls before the threshold, fail. + * Not ASAP: Use the next slot in the stream, + * no matter what. */ else if (!uhci_frame_before_eq(next, frame + (urb->number_of_packets - 1) * qh->period)) - return -EXDEV; + dev_dbg(uhci_dev(uhci), "iso underrun %p (%u+%u < %u)\n", + urb, frame, + (urb->number_of_packets - 1) * + qh->period, + next); } } diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index fae697ed0b70..773a6b28c4f1 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -287,7 +287,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue) xhci_queue_stop_endpoint(xhci, slot_id, i, suspend); } - cmd->command_trb = xhci->cmd_ring->enqueue; + cmd->command_trb = xhci_find_next_enqueue(xhci->cmd_ring); list_add_tail(&cmd->cmd_list, &virt_dev->cmd_list); xhci_queue_stop_endpoint(xhci, slot_id, 0, suspend); xhci_ring_cmd_db(xhci); @@ -552,11 +552,15 @@ void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status, u16 wIndex) * - Mark a port as being done with device resume, * and ring the endpoint doorbells. * - Stop the Synopsys redriver Compliance Mode polling. + * - Drop and reacquire the xHCI lock, in order to wait for port resume. */ static u32 xhci_get_port_status(struct usb_hcd *hcd, struct xhci_bus_state *bus_state, __le32 __iomem **port_array, - u16 wIndex, u32 raw_port_status) + u16 wIndex, u32 raw_port_status, + unsigned long flags) + __releases(&xhci->lock) + __acquires(&xhci->lock) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); u32 status = 0; @@ -591,21 +595,42 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, return 0xffffffff; if (time_after_eq(jiffies, bus_state->resume_done[wIndex])) { + int time_left; + xhci_dbg(xhci, "Resume USB2 port %d\n", wIndex + 1); bus_state->resume_done[wIndex] = 0; clear_bit(wIndex, &bus_state->resuming_ports); + + set_bit(wIndex, &bus_state->rexit_ports); xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); - xhci_dbg(xhci, "set port %d resume\n", - wIndex + 1); - slot_id = xhci_find_slot_id_by_port(hcd, xhci, - wIndex + 1); - if (!slot_id) { - xhci_dbg(xhci, "slot_id is zero\n"); - return 0xffffffff; + + spin_unlock_irqrestore(&xhci->lock, flags); + time_left = wait_for_completion_timeout( + &bus_state->rexit_done[wIndex], + msecs_to_jiffies( + XHCI_MAX_REXIT_TIMEOUT)); + spin_lock_irqsave(&xhci->lock, flags); + + if (time_left) { + slot_id = xhci_find_slot_id_by_port(hcd, + xhci, wIndex + 1); + if (!slot_id) { + xhci_dbg(xhci, "slot_id is zero\n"); + return 0xffffffff; + } + xhci_ring_device(xhci, slot_id); + } else { + int port_status = xhci_readl(xhci, + port_array[wIndex]); + xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n", + XHCI_MAX_REXIT_TIMEOUT, + port_status); + status |= USB_PORT_STAT_SUSPEND; + clear_bit(wIndex, &bus_state->rexit_ports); } - xhci_ring_device(xhci, slot_id); + bus_state->port_c_suspend |= 1 << wIndex; bus_state->suspended_ports &= ~(1 << wIndex); } else { @@ -728,7 +753,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; } status = xhci_get_port_status(hcd, bus_state, port_array, - wIndex, temp); + wIndex, temp, flags); if (status == 0xffffffff) goto error; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 53b972c2a09f..83bcd13622c3 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2428,6 +2428,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) for (i = 0; i < USB_MAXCHILDREN; ++i) { xhci->bus_state[0].resume_done[i] = 0; xhci->bus_state[1].resume_done[i] = 0; + /* Only the USB 2.0 completions will ever be used. */ + init_completion(&xhci->bus_state[1].rexit_done[i]); } if (scratchpad_alloc(xhci, flags)) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c2d495057eb5..236c3aabe940 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -351,7 +351,7 @@ static struct pci_driver xhci_pci_driver = { /* suspend and resume implemented later */ .shutdown = usb_hcd_pci_shutdown, -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM .driver = { .pm = &usb_hcd_pci_pm_ops }, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 411da1fc7ae8..6bfbd80ec2b9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -123,6 +123,16 @@ static int enqueue_is_link_trb(struct xhci_ring *ring) return TRB_TYPE_LINK_LE32(link->control); } +union xhci_trb *xhci_find_next_enqueue(struct xhci_ring *ring) +{ + /* Enqueue pointer can be left pointing to the link TRB, + * we must handle that + */ + if (TRB_TYPE_LINK_LE32(ring->enqueue->link.control)) + return ring->enq_seg->next->trbs; + return ring->enqueue; +} + /* Updates trb to point to the next TRB in the ring, and updates seg if the next * TRB is in a new segment. This does not skip over link TRBs, and it does not * effect the ring dequeue or enqueue pointers. @@ -859,8 +869,12 @@ remove_finished_td: /* Otherwise ring the doorbell(s) to restart queued transfers */ ring_doorbell_for_active_rings(xhci, slot_id, ep_index); } - ep->stopped_td = NULL; - ep->stopped_trb = NULL; + + /* Clear stopped_td and stopped_trb if endpoint is not halted */ + if (!(ep->ep_state & EP_HALTED)) { + ep->stopped_td = NULL; + ep->stopped_trb = NULL; + } /* * Drop the lock and complete the URBs in the cancelled TD list. @@ -1414,6 +1428,12 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, inc_deq(xhci, xhci->cmd_ring); return; } + /* There is no command to handle if we get a stop event when the + * command ring is empty, event->cmd_trb points to the next + * unset command + */ + if (xhci->cmd_ring->dequeue == xhci->cmd_ring->enqueue) + return; } switch (le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3]) @@ -1743,6 +1763,19 @@ static void handle_port_status(struct xhci_hcd *xhci, } } + /* + * Check to see if xhci-hub.c is waiting on RExit to U0 transition (or + * RExit to a disconnect state). If so, let the the driver know it's + * out of the RExit state. + */ + if (!DEV_SUPERSPEED(temp) && + test_and_clear_bit(faked_port_index, + &bus_state->rexit_ports)) { + complete(&bus_state->rexit_done[faked_port_index]); + bogus_port_status = true; + goto cleanup; + } + if (hcd->speed != HCD_USB3) xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 49b6edb84a79..1e36dbb48366 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2598,15 +2598,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, if (command) { cmd_completion = command->completion; cmd_status = &command->status; - command->command_trb = xhci->cmd_ring->enqueue; - - /* Enqueue pointer can be left pointing to the link TRB, - * we must handle that - */ - if (TRB_TYPE_LINK_LE32(command->command_trb->link.control)) - command->command_trb = - xhci->cmd_ring->enq_seg->next->trbs; - + command->command_trb = xhci_find_next_enqueue(xhci->cmd_ring); list_add_tail(&command->cmd_list, &virt_dev->cmd_list); } else { cmd_completion = &virt_dev->cmd_completion; @@ -2614,7 +2606,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, } init_completion(cmd_completion); - cmd_trb = xhci->cmd_ring->dequeue; + cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring); if (!ctx_change) ret = xhci_queue_configure_endpoint(xhci, in_ctx->dma, udev->slot_id, must_succeed); @@ -3439,14 +3431,7 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) /* Attempt to submit the Reset Device command to the command ring */ spin_lock_irqsave(&xhci->lock, flags); - reset_device_cmd->command_trb = xhci->cmd_ring->enqueue; - - /* Enqueue pointer can be left pointing to the link TRB, - * we must handle that - */ - if (TRB_TYPE_LINK_LE32(reset_device_cmd->command_trb->link.control)) - reset_device_cmd->command_trb = - xhci->cmd_ring->enq_seg->next->trbs; + reset_device_cmd->command_trb = xhci_find_next_enqueue(xhci->cmd_ring); list_add_tail(&reset_device_cmd->cmd_list, &virt_dev->cmd_list); ret = xhci_queue_reset_device(xhci, slot_id); @@ -3650,7 +3635,7 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) union xhci_trb *cmd_trb; spin_lock_irqsave(&xhci->lock, flags); - cmd_trb = xhci->cmd_ring->dequeue; + cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring); ret = xhci_queue_slot_control(xhci, TRB_ENABLE_SLOT, 0); if (ret) { spin_unlock_irqrestore(&xhci->lock, flags); @@ -3785,7 +3770,7 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) slot_ctx->dev_info >> 27); spin_lock_irqsave(&xhci->lock, flags); - cmd_trb = xhci->cmd_ring->dequeue; + cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring); ret = xhci_queue_address_device(xhci, virt_dev->in_ctx->dma, udev->slot_id); if (ret) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 46aa14894148..289fbfbae746 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1412,8 +1412,18 @@ struct xhci_bus_state { unsigned long resume_done[USB_MAXCHILDREN]; /* which ports have started to resume */ unsigned long resuming_ports; + /* Which ports are waiting on RExit to U0 transition. */ + unsigned long rexit_ports; + struct completion rexit_done[USB_MAXCHILDREN]; }; + +/* + * It can take up to 20 ms to transition from RExit to U0 on the + * Intel Lynx Point LP xHCI host. + */ +#define XHCI_MAX_REXIT_TIMEOUT (20 * 1000) + static inline unsigned int hcd_index(struct usb_hcd *hcd) { if (hcd->speed == HCD_USB3) @@ -1840,6 +1850,7 @@ int xhci_cancel_cmd(struct xhci_hcd *xhci, struct xhci_command *command, union xhci_trb *cmd_trb); void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, unsigned int stream_id); +union xhci_trb *xhci_find_next_enqueue(struct xhci_ring *ring); /* xHCI roothub code */ void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array, diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 4047cbb91bac..bd4138d80a48 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -535,6 +535,9 @@ static int dsps_probe(struct platform_device *pdev) struct dsps_glue *glue; int ret; + if (!strcmp(pdev->name, "musb-hdrc")) + return -ENODEV; + match = of_match_node(musb_dsps_of_match, pdev->dev.of_node); if (!match) { dev_err(&pdev->dev, "fail to get matching of_match struct\n"); diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 9a08679d204d..b19ed213ab85 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1790,6 +1790,10 @@ int musb_gadget_setup(struct musb *musb) musb->g.max_speed = USB_SPEED_HIGH; musb->g.speed = USB_SPEED_UNKNOWN; + MUSB_DEV_MODE(musb); + musb->xceiv->otg->default_a = 0; + musb->xceiv->state = OTG_STATE_B_IDLE; + /* this "gadget" abstracts/virtualizes the controller */ musb->g.name = musb_driver_name; musb->g.is_otg = 1; @@ -1849,7 +1853,6 @@ static int musb_gadget_start(struct usb_gadget *g, musb->gadget_driver = driver; spin_lock_irqsave(&musb->lock, flags); - musb->is_active = 1; otg_set_peripheral(otg, &musb->g); musb->xceiv->state = OTG_STATE_B_IDLE; diff --git a/drivers/usb/phy/phy-gpio-vbus-usb.c b/drivers/usb/phy/phy-gpio-vbus-usb.c index b2f29c9aebbf..02799a5efcd4 100644 --- a/drivers/usb/phy/phy-gpio-vbus-usb.c +++ b/drivers/usb/phy/phy-gpio-vbus-usb.c @@ -241,7 +241,7 @@ static int gpio_vbus_set_suspend(struct usb_phy *phy, int suspend) /* platform driver interface */ -static int __init gpio_vbus_probe(struct platform_device *pdev) +static int gpio_vbus_probe(struct platform_device *pdev) { struct gpio_vbus_mach_info *pdata = dev_get_platdata(&pdev->dev); struct gpio_vbus_data *gpio_vbus; @@ -349,7 +349,7 @@ err_gpio: return err; } -static int __exit gpio_vbus_remove(struct platform_device *pdev) +static int gpio_vbus_remove(struct platform_device *pdev) { struct gpio_vbus_data *gpio_vbus = platform_get_drvdata(pdev); struct gpio_vbus_mach_info *pdata = dev_get_platdata(&pdev->dev); @@ -398,8 +398,6 @@ static const struct dev_pm_ops gpio_vbus_dev_pm_ops = { }; #endif -/* NOTE: the gpio-vbus device may *NOT* be hotplugged */ - MODULE_ALIAS("platform:gpio-vbus"); static struct platform_driver gpio_vbus_driver = { @@ -410,10 +408,11 @@ static struct platform_driver gpio_vbus_driver = { .pm = &gpio_vbus_dev_pm_ops, #endif }, - .remove = __exit_p(gpio_vbus_remove), + .probe = gpio_vbus_probe, + .remove = gpio_vbus_remove, }; -module_platform_driver_probe(gpio_vbus_driver, gpio_vbus_probe); +module_platform_driver(gpio_vbus_driver); MODULE_DESCRIPTION("simple GPIO controlled OTG transceiver driver"); MODULE_AUTHOR("Philipp Zabel"); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1cf6f125f5f0..80a7104d5ddb 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -81,6 +81,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_VENDOR_ID 0x12D1 #define HUAWEI_PRODUCT_E173 0x140C +#define HUAWEI_PRODUCT_E1750 0x1406 #define HUAWEI_PRODUCT_K4505 0x1464 #define HUAWEI_PRODUCT_K3765 0x1465 #define HUAWEI_PRODUCT_K4605 0x14C6 @@ -567,6 +568,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c23, USB_CLASS_COMM, 0x02, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t) &net_intf1_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1750, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t) &net_intf2_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1441, USB_CLASS_COMM, 0x02, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1442, USB_CLASS_COMM, 0x02, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4505, 0xff, 0xff, 0xff), diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index a9807dea3887..4fb7a8f83c8a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -545,6 +545,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, long npage; int ret = 0, prot = 0; uint64_t mask; + struct vfio_dma *dma = NULL; + unsigned long pfn; end = map->iova + map->size; @@ -587,8 +589,6 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, } for (iova = map->iova; iova < end; iova += size, vaddr += size) { - struct vfio_dma *dma = NULL; - unsigned long pfn; long i; /* Pin a contiguous chunk of memory */ @@ -597,16 +597,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; - break; + goto out; } /* Verify pages are not already mapped */ for (i = 0; i < npage; i++) { if (iommu_iova_to_phys(iommu->domain, iova + (i << PAGE_SHIFT))) { - vfio_unpin_pages(pfn, npage, prot, true); ret = -EBUSY; - break; + goto out_unpin; } } @@ -616,8 +615,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (ret) { if (ret != -EBUSY || map_try_harder(iommu, iova, pfn, npage, prot)) { - vfio_unpin_pages(pfn, npage, prot, true); - break; + goto out_unpin; } } @@ -672,9 +670,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { iommu_unmap(iommu->domain, iova, size); - vfio_unpin_pages(pfn, npage, prot, true); ret = -ENOMEM; - break; + goto out_unpin; } dma->size = size; @@ -685,16 +682,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, } } - if (ret) { - struct vfio_dma *tmp; - iova = map->iova; - size = map->size; - while ((tmp = vfio_find_dma(iommu, iova, size))) { - int r = vfio_remove_dma_overlap(iommu, iova, - &size, tmp); - if (WARN_ON(r || !size)) - break; - } + WARN_ON(ret); + mutex_unlock(&iommu->lock); + return ret; + +out_unpin: + vfio_unpin_pages(pfn, npage, prot, true); + +out: + iova = map->iova; + size = map->size; + while ((dma = vfio_find_dma(iommu, iova, size))) { + int r = vfio_remove_dma_overlap(iommu, iova, + &size, dma); + if (WARN_ON(r || !size)) + break; } mutex_unlock(&iommu->lock); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 592b31698fc8..ce5221fa393a 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -728,7 +728,12 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, } se_sess = tv_nexus->tvn_se_sess; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + if (tag < 0) { + pr_err("Unable to obtain tag for tcm_vhost_cmd\n"); + return ERR_PTR(-ENOMEM); + } + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; sg = cmd->tvc_sgl; pages = cmd->tvc_upages; diff --git a/drivers/video/mmp/hw/mmp_ctrl.c b/drivers/video/mmp/hw/mmp_ctrl.c index 75dca19bf214..6ac755270ab4 100644 --- a/drivers/video/mmp/hw/mmp_ctrl.c +++ b/drivers/video/mmp/hw/mmp_ctrl.c @@ -514,7 +514,7 @@ static int mmphw_probe(struct platform_device *pdev) if (IS_ERR(ctrl->clk)) { dev_err(ctrl->dev, "unable to get clk %s\n", mi->clk_name); ret = -ENOENT; - goto failed_get_clk; + goto failed; } clk_prepare_enable(ctrl->clk); @@ -551,21 +551,8 @@ failed_path_init: path_deinit(path_plat); } - if (ctrl->clk) { - devm_clk_put(ctrl->dev, ctrl->clk); - clk_disable_unprepare(ctrl->clk); - } -failed_get_clk: - devm_free_irq(ctrl->dev, ctrl->irq, ctrl); + clk_disable_unprepare(ctrl->clk); failed: - if (ctrl) { - if (ctrl->reg_base) - devm_iounmap(ctrl->dev, ctrl->reg_base); - devm_release_mem_region(ctrl->dev, res->start, - resource_size(res)); - devm_kfree(ctrl->dev, ctrl); - } - dev_err(&pdev->dev, "device init failed\n"); return ret; diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c index d250ed0f806d..27197a8048c0 100644 --- a/drivers/video/mxsfb.c +++ b/drivers/video/mxsfb.c @@ -620,6 +620,7 @@ static int mxsfb_restore_mode(struct mxsfb_info *host) break; case 3: bits_per_pixel = 32; + break; case 1: default: return -EINVAL; diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index 7ef079c146e7..c172a5281f9e 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -2075,6 +2075,7 @@ static int neofb_probe(struct pci_dev *dev, const struct pci_device_id *id) if (!fb_find_mode(&info->var, info, mode_option, NULL, 0, info->monspecs.modedb, 16)) { printk(KERN_ERR "neofb: Unable to find usable video mode.\n"); + err = -EINVAL; goto err_map_video; } @@ -2097,7 +2098,8 @@ static int neofb_probe(struct pci_dev *dev, const struct pci_device_id *id) info->fix.smem_len >> 10, info->var.xres, info->var.yres, h_sync / 1000, h_sync % 1000, v_sync); - if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) + err = fb_alloc_cmap(&info->cmap, 256, 0); + if (err < 0) goto err_map_video; err = register_framebuffer(info); diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c index 171821ddd78d..ba5b40f581f6 100644 --- a/drivers/video/of_display_timing.c +++ b/drivers/video/of_display_timing.c @@ -120,7 +120,7 @@ int of_get_display_timing(struct device_node *np, const char *name, return -EINVAL; } - timing_np = of_find_node_by_name(np, name); + timing_np = of_get_child_by_name(np, name); if (!timing_np) { pr_err("%s: could not find node '%s'\n", of_node_full_name(np), name); @@ -143,11 +143,11 @@ struct display_timings *of_get_display_timings(struct device_node *np) struct display_timings *disp; if (!np) { - pr_err("%s: no devicenode given\n", of_node_full_name(np)); + pr_err("%s: no device node given\n", of_node_full_name(np)); return NULL; } - timings_np = of_find_node_by_name(np, "display-timings"); + timings_np = of_get_child_by_name(np, "display-timings"); if (!timings_np) { pr_err("%s: could not find display-timings node\n", of_node_full_name(np)); diff --git a/drivers/video/omap2/displays-new/Kconfig b/drivers/video/omap2/displays-new/Kconfig index 6c90885b0940..10b25e7cd878 100644 --- a/drivers/video/omap2/displays-new/Kconfig +++ b/drivers/video/omap2/displays-new/Kconfig @@ -35,6 +35,7 @@ config DISPLAY_PANEL_DPI config DISPLAY_PANEL_DSI_CM tristate "Generic DSI Command Mode Panel" + depends on BACKLIGHT_CLASS_DEVICE help Driver for generic DSI command mode panels. diff --git a/drivers/video/omap2/displays-new/connector-analog-tv.c b/drivers/video/omap2/displays-new/connector-analog-tv.c index 1b60698f141e..ccd9073f706f 100644 --- a/drivers/video/omap2/displays-new/connector-analog-tv.c +++ b/drivers/video/omap2/displays-new/connector-analog-tv.c @@ -191,7 +191,7 @@ static int tvc_probe_pdata(struct platform_device *pdev) in = omap_dss_find_output(pdata->source); if (in == NULL) { dev_err(&pdev->dev, "Failed to find video source\n"); - return -ENODEV; + return -EPROBE_DEFER; } ddata->in = in; diff --git a/drivers/video/omap2/displays-new/connector-dvi.c b/drivers/video/omap2/displays-new/connector-dvi.c index bc5f8ceda371..63d88ee6dfe4 100644 --- a/drivers/video/omap2/displays-new/connector-dvi.c +++ b/drivers/video/omap2/displays-new/connector-dvi.c @@ -263,7 +263,7 @@ static int dvic_probe_pdata(struct platform_device *pdev) in = omap_dss_find_output(pdata->source); if (in == NULL) { dev_err(&pdev->dev, "Failed to find video source\n"); - return -ENODEV; + return -EPROBE_DEFER; } ddata->in = in; diff --git a/drivers/video/omap2/displays-new/connector-hdmi.c b/drivers/video/omap2/displays-new/connector-hdmi.c index c5826716d6ab..9abe2c039ae9 100644 --- a/drivers/video/omap2/displays-new/connector-hdmi.c +++ b/drivers/video/omap2/displays-new/connector-hdmi.c @@ -290,7 +290,7 @@ static int hdmic_probe_pdata(struct platform_device *pdev) in = omap_dss_find_output(pdata->source); if (in == NULL) { dev_err(&pdev->dev, "Failed to find video source\n"); - return -ENODEV; + return -EPROBE_DEFER; } ddata->in = in; diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c index 02a7340111df..477975009eee 100644 --- a/drivers/video/omap2/dss/dispc.c +++ b/drivers/video/omap2/dss/dispc.c @@ -3691,6 +3691,7 @@ static int __init omap_dispchw_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); + pm_runtime_irq_safe(&pdev->dev); r = dispc_runtime_get(); if (r) diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index 47ca86c5c6c0..d838ba829459 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c @@ -1336,14 +1336,7 @@ static int s3_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) (info->var.bits_per_pixel * info->var.xres_virtual); if (info->var.yres_virtual < info->var.yres) { dev_err(info->device, "virtual vertical size smaller than real\n"); - goto err_find_mode; - } - - /* maximize virtual vertical size for fast scrolling */ - info->var.yres_virtual = info->fix.smem_len * 8 / - (info->var.bits_per_pixel * info->var.xres_virtual); - if (info->var.yres_virtual < info->var.yres) { - dev_err(info->device, "virtual vertical size smaller than real\n"); + rc = -EINVAL; goto err_find_mode; } diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 5be5e3d14f79..19f3c3fc65f4 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -802,6 +802,12 @@ static int hpwdt_init_one(struct pci_dev *dev, return -ENODEV; } + /* + * Ignore all auxilary iLO devices with the following PCI ID + */ + if (dev->subsystem_device == 0x1979) + return -ENODEV; + if (pci_enable_device(dev)) { dev_warn(&dev->dev, "Not possible to enable PCI Device: 0x%x:0x%x.\n", diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c index 491419e0772a..5c3d4df63e68 100644 --- a/drivers/watchdog/kempld_wdt.c +++ b/drivers/watchdog/kempld_wdt.c @@ -35,7 +35,7 @@ #define KEMPLD_WDT_STAGE_TIMEOUT(x) (0x1b + (x) * 4) #define KEMPLD_WDT_STAGE_CFG(x) (0x18 + (x)) #define STAGE_CFG_GET_PRESCALER(x) (((x) & 0x30) >> 4) -#define STAGE_CFG_SET_PRESCALER(x) (((x) & 0x30) << 4) +#define STAGE_CFG_SET_PRESCALER(x) (((x) & 0x3) << 4) #define STAGE_CFG_PRESCALER_MASK 0x30 #define STAGE_CFG_ACTION_MASK 0x7 #define STAGE_CFG_ASSERT (1 << 3) diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index 1f94b42764aa..f6caa77151c7 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -146,7 +146,7 @@ static const struct watchdog_ops sunxi_wdt_ops = { .set_timeout = sunxi_wdt_set_timeout, }; -static int __init sunxi_wdt_probe(struct platform_device *pdev) +static int sunxi_wdt_probe(struct platform_device *pdev) { struct sunxi_wdt_dev *sunxi_wdt; struct resource *res; @@ -187,7 +187,7 @@ static int __init sunxi_wdt_probe(struct platform_device *pdev) return 0; } -static int __exit sunxi_wdt_remove(struct platform_device *pdev) +static int sunxi_wdt_remove(struct platform_device *pdev) { struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev); diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 42913f131dc2..c9b0c627fe7e 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -310,7 +310,8 @@ static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd, case WDIOC_GETSTATUS: case WDIOC_GETBOOTSTATUS: - return put_user(0, p); + error = put_user(0, p); + break; case WDIOC_KEEPALIVE: ts72xx_wdt_kick(wdt); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index a50c6e3a7cc4..b232908a6192 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -398,8 +398,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); - scratch_page = get_balloon_scratch_page(); - for (i = 0; i < nr_pages; i++) { page = alloc_page(gfp); if (page == NULL) { @@ -413,6 +411,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) scrub_page(page); + /* + * Ballooned out frames are effectively replaced with + * a scratch frame. Ensure direct mappings and the + * p2m are consistent. + */ + scratch_page = get_balloon_scratch_page(); #ifdef CONFIG_XEN_HAVE_PVMMU if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( @@ -422,24 +426,19 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) BUG_ON(ret); } #endif - } - - /* Ensure that ballooned highmem pages don't have kmaps. */ - kmap_flush_unused(); - flush_tlb_all(); - - /* No more mappings: invalidate P2M and add to balloon. */ - for (i = 0; i < nr_pages; i++) { - pfn = mfn_to_pfn(frame_list[i]); if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long p; p = page_to_pfn(scratch_page); __set_phys_to_machine(pfn, pfn_to_mfn(p)); } + put_balloon_scratch_page(); + balloon_append(pfn_to_page(pfn)); } - put_balloon_scratch_page(); + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 646337dc5201..529300327f45 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -600,9 +600,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) /* lock down the parent dentry so we can peer at it */ parent = dget_parent(dentry); - if (!parent->d_inode) - goto out_bad; - dir = AFS_FS_I(parent->d_inode); /* validate the parent directory */ @@ -167,10 +167,25 @@ static int __init aio_setup(void) } __initcall(aio_setup); +static void put_aio_ring_file(struct kioctx *ctx) +{ + struct file *aio_ring_file = ctx->aio_ring_file; + if (aio_ring_file) { + truncate_setsize(aio_ring_file->f_inode, 0); + + /* Prevent further access to the kioctx from migratepages */ + spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock); + aio_ring_file->f_inode->i_mapping->private_data = NULL; + ctx->aio_ring_file = NULL; + spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock); + + fput(aio_ring_file); + } +} + static void aio_free_ring(struct kioctx *ctx) { int i; - struct file *aio_ring_file = ctx->aio_ring_file; for (i = 0; i < ctx->nr_pages; i++) { pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, @@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx) put_page(ctx->ring_pages[i]); } + put_aio_ring_file(ctx); + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); - - if (aio_ring_file) { - truncate_setsize(aio_ring_file->f_inode, 0); - fput(aio_ring_file); - ctx->aio_ring_file = NULL; - } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) { - struct kioctx *ctx = mapping->private_data; + struct kioctx *ctx; unsigned long flags; - unsigned idx = old->index; int rc; /* Writeback must be complete */ @@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, get_page(new); - spin_lock_irqsave(&ctx->completion_lock, flags); - migrate_page_copy(new, old); - ctx->ring_pages[idx] = new; - spin_unlock_irqrestore(&ctx->completion_lock, flags); + /* We can potentially race against kioctx teardown here. Use the + * address_space's private data lock to protect the mapping's + * private_data. + */ + spin_lock(&mapping->private_lock); + ctx = mapping->private_data; + if (ctx) { + pgoff_t idx; + spin_lock_irqsave(&ctx->completion_lock, flags); + migrate_page_copy(new, old); + idx = old->index; + if (idx < (pgoff_t)ctx->nr_pages) + ctx->ring_pages[idx] = new; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else + rc = -EBUSY; + spin_unlock(&mapping->private_lock); return rc; } @@ -617,8 +640,7 @@ out_freepcpu: out_freeref: free_percpu(ctx->users.pcpu_count); out_freectx: - if (ctx->aio_ring_file) - fput(ctx->aio_ring_file); + put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 100edcc5e312..4c94a79991bb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static void fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note) { struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; @@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note) names_ofs = (2 + 3 * count) * sizeof(data[0]); alloc: if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ - goto err; + return -EINVAL; size = round_up(size, PAGE_SIZE); data = vmalloc(size); if (!data) - goto err; + return -ENOMEM; start_end_ofs = data + 2; name_base = name_curpos = ((char *)data) + names_ofs; @@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note) size = name_curpos - (char *)data; fill_note(note, "CORE", NT_FILE, size, data); - err: ; + return 0; } #ifdef CORE_DUMP_USE_REGSET @@ -1686,8 +1686,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - fill_files_note(&info->files); - info->size += notesize(&info->files); + if (fill_files_note(&info->files) == 0) + info->size += notesize(&info->files); return 1; } @@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info, return 0; if (first && !writenote(&info->auxv, file, foffset)) return 0; - if (first && !writenote(&info->files, file, foffset)) + if (first && info->files.data && + !writenote(&info->files, file, foffset)) return 0; for (i = 1; i < info->thread_notes; ++i) @@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) struct elf_note_info { struct memelfnote *notes; + struct memelfnote *notes_files; struct elf_prstatus *prstatus; /* NT_PRSTATUS */ struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ struct list_head thread_list; @@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); fill_auxv_note(info->notes + 3, current->mm); - fill_files_note(info->notes + 4); + info->numnote = 4; - info->numnote = 5; + if (fill_files_note(info->notes + info->numnote) == 0) { + info->notes_files = info->notes + info->numnote; + info->numnote++; + } /* Try to dump the FPU. */ info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, @@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info) kfree(list_entry(tmp, struct elf_thread_status, list)); } - /* Free data allocated by fill_files_note(): */ - vfree(info->notes[4].data); + /* Free data possibly allocated by fill_files_note(): */ + if (info->notes_files) + vfree(info->notes_files->data); kfree(info->prstatus); kfree(info->psinfo); @@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm) struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff, foffset; - struct elf_note_info info; + struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; @@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src) src_p = kmap_atomic(src_bv->bv_page); dst_p = kmap_atomic(dst_bv->bv_page); - memcpy(dst_p + dst_bv->bv_offset, - src_p + src_bv->bv_offset, + memcpy(dst_p + dst_offset, + src_p + src_offset, bytes); kunmap_atomic(dst_p); diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 58b7d14b08ee..08cc08f037a6 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) worker->idle = 1; /* the list may be empty if the worker is just starting */ - if (!list_empty(&worker->worker_list)) { + if (!list_empty(&worker->worker_list) && + !worker->workers->stopping) { list_move(&worker->worker_list, &worker->workers->idle_list); } @@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 0; - if (!list_empty(&worker->worker_list)) { + if (!list_empty(&worker->worker_list) && + !worker->workers->stopping) { list_move_tail(&worker->worker_list, &worker->workers->worker_list); } @@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers) int can_stop; spin_lock_irq(&workers->lock); + workers->stopping = 1; list_splice_init(&workers->idle_list, &workers->worker_list); while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; @@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, workers->ordered = 0; workers->atomic_start_pending = 0; workers->atomic_worker_start = async_helper; + workers->stopping = 0; } /* @@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) atomic_set(&worker->num_pending, 0); atomic_set(&worker->refs, 1); worker->workers = workers; - worker->task = kthread_run(worker_loop, worker, - "btrfs-%s-%d", workers->name, - workers->num_workers + 1); + worker->task = kthread_create(worker_loop, worker, + "btrfs-%s-%d", workers->name, + workers->num_workers + 1); if (IS_ERR(worker->task)) { ret = PTR_ERR(worker->task); - kfree(worker); goto fail; } + spin_lock_irq(&workers->lock); + if (workers->stopping) { + spin_unlock_irq(&workers->lock); + goto fail_kthread; + } list_add_tail(&worker->worker_list, &workers->idle_list); worker->idle = 1; workers->num_workers++; @@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) WARN_ON(workers->num_workers_starting < 0); spin_unlock_irq(&workers->lock); + wake_up_process(worker->task); return 0; + +fail_kthread: + kthread_stop(worker->task); fail: + kfree(worker); spin_lock_irq(&workers->lock); workers->num_workers_starting--; spin_unlock_irq(&workers->lock); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 063698b90ce2..1f26792683ed 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -107,6 +107,8 @@ struct btrfs_workers { /* extra name for this worker, used for current->name */ char *name; + + int stopping; }; void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 70681686e8dc..9efb94e95858 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -535,10 +535,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); btrfs_rm_dev_replace_srcdev(fs_info, src_device); - if (src_device->bdev) { - /* zero out the old super */ - btrfs_scratch_superblock(src_device); - } + /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ae17ed13b32..62176ad89846 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, return ret; } -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + bool check_ref) { struct btrfs_root *root; int ret; @@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) { - if (btrfs_root_refs(&root->root_item) == 0) + if (check_ref && btrfs_root_refs(&root->root_item) == 0) return ERR_PTR(-ENOENT); return root; } @@ -1595,7 +1596,7 @@ again: if (IS_ERR(root)) return root; - if (btrfs_root_refs(&root->root_item) == 0) { + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b71acd6e1e5b..5ce2a7da8b11 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, int btrfs_init_fs_root(struct btrfs_root *root); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *key, + bool check_ref); +static inline struct btrfs_root * +btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + return btrfs_get_fs_root(fs_info, location, true); +} + int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_root *root); void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c09a40db53db..51731b76900d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -145,8 +145,16 @@ int __init extent_io_init(void) offsetof(struct btrfs_io_bio, bio)); if (!btrfs_bioset) goto free_buffer_cache; + + if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) + goto free_bioset; + return 0; +free_bioset: + bioset_free(btrfs_bioset); + btrfs_bioset = NULL; + free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); extent_buffer_cache = NULL; @@ -1482,10 +1490,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, cur_start = state->end + 1; node = rb_next(node); total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) { - *end = *start + max_bytes - 1; + if (total_bytes >= max_bytes) break; - } if (!node) break; } @@ -1614,7 +1620,7 @@ again: *start = delalloc_start; *end = delalloc_end; free_extent_state(cached_state); - return found; + return 0; } /* @@ -1627,10 +1633,9 @@ again: /* * make sure to limit the number of pages we try to lock down - * if we're looping. */ - if (delalloc_end + 1 - delalloc_start > max_bytes && loops) - delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; + if (delalloc_end + 1 - delalloc_start > max_bytes) + delalloc_end = delalloc_start + max_bytes - 1; /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, @@ -1641,8 +1646,7 @@ again: */ free_extent_state(cached_state); if (!loops) { - unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); - max_bytes = PAGE_CACHE_SIZE - offset; + max_bytes = PAGE_CACHE_SIZE; loops = 1; goto again; } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22ebc13b6c99..b0ef7b07b1b3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7986,7 +7986,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, new_dentry->d_name.name, new_dentry->d_name.len); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a5a26320503f..4a355726151e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, else key.offset = (u64)-1; - return btrfs_read_fs_root_no_name(fs_info, &key); + return btrfs_get_fs_root(fs_info, &key, false); } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0b1f4ef8db98..ec71ea44d2b4 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) continue; } - if (btrfs_root_refs(&root->root_item) == 0) { - btrfs_add_dead_root(root); - continue; - } - err = btrfs_init_fs_root(root); if (err) { btrfs_free_fs_root(root); @@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) btrfs_free_fs_root(root); break; } + + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); } btrfs_free_path(path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e7a95356df83..8c81bdc1ef9b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1838,11 +1838,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, assert_qgroups_uptodate(trans); update_super_roots(root); - if (!root->fs_info->log_root_recovering) { - btrfs_set_super_log_root(root->fs_info->super_copy, 0); - btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); - } - + btrfs_set_super_log_root(root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, sizeof(*root->fs_info->super_copy)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a10645830223..043b215769c2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1716,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev) { WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); + list_del_rcu(&srcdev->dev_list); list_del_rcu(&srcdev->dev_alloc_list); fs_info->fs_devices->num_devices--; @@ -1725,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, } if (srcdev->can_discard) fs_info->fs_devices->num_can_discard--; - if (srcdev->bdev) + if (srcdev->bdev) { fs_info->fs_devices->open_devices--; + /* zero out the old super */ + btrfs_scratch_superblock(srcdev); + } + call_rcu(&srcdev->rcu, free_device); } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index ea723a5e8226..6d0b07217ac9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.01" +#define CIFS_VERSION "2.02" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cfa14c80ef3b..52b6f6c26bfc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -547,9 +547,6 @@ struct TCP_Server_Info { unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ - unsigned int max_vcs; /* maximum number of smb sessions, at least - those that can be specified uniquely with - vcnumbers */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ @@ -715,7 +712,6 @@ struct cifs_ses { enum statusEnum status; unsigned overrideSecFlg; /* if non-zero override global sec flags */ __u16 ipc_tid; /* special tid for connection to IPC share */ - __u16 vcnum; char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ @@ -1272,6 +1268,7 @@ struct dfs_info3_param { #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 +#define CIFS_FATTR_UNKNOWN_NLINK 0x10 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 948676db8e2e..a630475e421c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2652,26 +2652,7 @@ typedef struct file_xattr_info { } __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info level 0x205 */ - -/* flags for chattr command */ -#define EXT_SECURE_DELETE 0x00000001 /* EXT3_SECRM_FL */ -#define EXT_ENABLE_UNDELETE 0x00000002 /* EXT3_UNRM_FL */ -/* Reserved for compress file 0x4 */ -#define EXT_SYNCHRONOUS 0x00000008 /* EXT3_SYNC_FL */ -#define EXT_IMMUTABLE_FL 0x00000010 /* EXT3_IMMUTABLE_FL */ -#define EXT_OPEN_APPEND_ONLY 0x00000020 /* EXT3_APPEND_FL */ -#define EXT_DO_NOT_BACKUP 0x00000040 /* EXT3_NODUMP_FL */ -#define EXT_NO_UPDATE_ATIME 0x00000080 /* EXT3_NOATIME_FL */ -/* 0x100 through 0x800 reserved for compression flags and are GET-ONLY */ -#define EXT_HASH_TREE_INDEXED_DIR 0x00001000 /* GET-ONLY EXT3_INDEX_FL */ -/* 0x2000 reserved for IMAGIC_FL */ -#define EXT_JOURNAL_THIS_FILE 0x00004000 /* GET-ONLY EXT3_JOURNAL_DATA_FL */ -/* 0x8000 reserved for EXT3_NOTAIL_FL */ -#define EXT_SYNCHRONOUS_DIR 0x00010000 /* EXT3_DIRSYNC_FL */ -#define EXT_TOPDIR 0x00020000 /* EXT3_TOPDIR_FL */ - -#define EXT_SET_MASK 0x000300FF -#define EXT_GET_MASK 0x0003DFFF +/* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */ typedef struct file_chattr_info { __le64 mask; /* list of all possible attribute bits */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a3d74fea1623..4baf35949b51 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -463,7 +463,6 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) cifs_max_pending); set_credits(server, server->maxReq); server->maxBuf = le16_to_cpu(rsp->MaxBufSize); - server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index eb955b525e55..7ddddf2e2504 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3254,6 +3254,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative + * + * After this point, every page in the list might have PG_fscache set, + * so we will need to clean that up off of every page we don't use. */ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, &num_pages); @@ -3376,6 +3379,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, kref_put(&rdata->refcount, cifs_readdata_release); } + /* Any pages that have been shown to fscache but didn't get added to + * the pagecache must be uncached before they get returned to the + * allocator. + */ + cifs_fscache_readpages_cancel(mapping->host, page_list); return rc; } diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 2f4bc5a58054..b3258f35e88a 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -223,6 +223,13 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) fscache_uncache_page(CIFS_I(inode)->fscache, page); } +void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages) +{ + cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n", + __func__, CIFS_I(inode)->fscache, inode); + fscache_readpages_cancel(CIFS_I(inode)->fscache, pages); +} + void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) { struct cifsInodeInfo *cifsi = CIFS_I(inode); diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 63539323e0b9..24794b6cd8ec 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -54,6 +54,7 @@ extern int __cifs_readpages_from_fscache(struct inode *, struct address_space *, struct list_head *, unsigned *); +extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *); extern void __cifs_readpage_to_fscache(struct inode *, struct page *); @@ -91,6 +92,13 @@ static inline void cifs_readpage_to_fscache(struct inode *inode, __cifs_readpage_to_fscache(inode, page); } +static inline void cifs_fscache_readpages_cancel(struct inode *inode, + struct list_head *pages) +{ + if (CIFS_I(inode)->fscache) + return __cifs_fscache_readpages_cancel(inode, pages); +} + #else /* CONFIG_CIFS_FSCACHE */ static inline int cifs_fscache_register(void) { return 0; } static inline void cifs_fscache_unregister(void) {} @@ -131,6 +139,11 @@ static inline int cifs_readpages_from_fscache(struct inode *inode, static inline void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {} +static inline void cifs_fscache_readpages_cancel(struct inode *inode, + struct list_head *pages) +{ +} + #endif /* CONFIG_CIFS_FSCACHE */ #endif /* _CIFS_FSCACHE_H */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f9ff9c173f78..867b7cdc794a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -120,6 +120,33 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) cifs_i->invalid_mapping = true; } +/* + * copy nlink to the inode, unless it wasn't provided. Provide + * sane values if we don't have an existing one and none was provided + */ +static void +cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) +{ + /* + * if we're in a situation where we can't trust what we + * got from the server (readdir, some non-unix cases) + * fake reasonable values + */ + if (fattr->cf_flags & CIFS_FATTR_UNKNOWN_NLINK) { + /* only provide fake values on a new inode */ + if (inode->i_state & I_NEW) { + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) + set_nlink(inode, 2); + else + set_nlink(inode, 1); + } + return; + } + + /* we trust the server, so update it */ + set_nlink(inode, fattr->cf_nlink); +} + /* populate an inode with info from a cifs_fattr struct */ void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) @@ -134,7 +161,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) inode->i_mtime = fattr->cf_mtime; inode->i_ctime = fattr->cf_ctime; inode->i_rdev = fattr->cf_rdev; - set_nlink(inode, fattr->cf_nlink); + cifs_nlink_fattr_to_inode(inode, fattr); inode->i_uid = fattr->cf_uid; inode->i_gid = fattr->cf_gid; @@ -541,6 +568,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_bytes = le64_to_cpu(info->AllocationSize); fattr->cf_createtime = le64_to_cpu(info->CreationTime); + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; @@ -548,7 +576,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, * Server can return wrong NumberOfLinks value for directories * when Unix extensions are disabled - fake it. */ - fattr->cf_nlink = 2; + if (!tcon->unix_ext) + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { fattr->cf_mode = S_IFLNK; fattr->cf_dtype = DT_LNK; @@ -561,11 +590,15 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (fattr->cf_cifsattrs & ATTR_READONLY) fattr->cf_mode &= ~(S_IWUGO); - fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - if (fattr->cf_nlink < 1) { - cifs_dbg(1, "replacing bogus file nlink value %u\n", + /* + * Don't accept zero nlink from non-unix servers unless + * delete is pending. Instead mark it as unknown. + */ + if ((fattr->cf_nlink < 1) && !tcon->unix_ext && + !info->DeletePending) { + cifs_dbg(1, "bogus file nlink value %u\n", fattr->cf_nlink); - fattr->cf_nlink = 1; + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 42ef03be089f..53a75f3d0179 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -180,6 +180,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) fattr->cf_dtype = DT_REG; } + /* non-unix readdir doesn't provide nlink */ + fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; + if (fattr->cf_cifsattrs & ATTR_READONLY) fattr->cf_mode &= ~S_IWUGO; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5f99b7f19e78..352358de1d7e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -32,88 +32,6 @@ #include <linux/slab.h> #include "cifs_spnego.h" -/* - * Checks if this is the first smb session to be reconnected after - * the socket has been reestablished (so we know whether to use vc 0). - * Called while holding the cifs_tcp_ses_lock, so do not block - */ -static bool is_first_ses_reconnect(struct cifs_ses *ses) -{ - struct list_head *tmp; - struct cifs_ses *tmp_ses; - - list_for_each(tmp, &ses->server->smb_ses_list) { - tmp_ses = list_entry(tmp, struct cifs_ses, - smb_ses_list); - if (tmp_ses->need_reconnect == false) - return false; - } - /* could not find a session that was already connected, - this must be the first one we are reconnecting */ - return true; -} - -/* - * vc number 0 is treated specially by some servers, and should be the - * first one we request. After that we can use vcnumbers up to maxvcs, - * one for each smb session (some Windows versions set maxvcs incorrectly - * so maxvc=1 can be ignored). If we have too many vcs, we can reuse - * any vc but zero (some servers reset the connection on vcnum zero) - * - */ -static __le16 get_next_vcnum(struct cifs_ses *ses) -{ - __u16 vcnum = 0; - struct list_head *tmp; - struct cifs_ses *tmp_ses; - __u16 max_vcs = ses->server->max_vcs; - __u16 i; - int free_vc_found = 0; - - /* Quoting the MS-SMB specification: "Windows-based SMB servers set this - field to one but do not enforce this limit, which allows an SMB client - to establish more virtual circuits than allowed by this value ... but - other server implementations can enforce this limit." */ - if (max_vcs < 2) - max_vcs = 0xFFFF; - - spin_lock(&cifs_tcp_ses_lock); - if ((ses->need_reconnect) && is_first_ses_reconnect(ses)) - goto get_vc_num_exit; /* vcnum will be zero */ - for (i = ses->server->srv_count - 1; i < max_vcs; i++) { - if (i == 0) /* this is the only connection, use vc 0 */ - break; - - free_vc_found = 1; - - list_for_each(tmp, &ses->server->smb_ses_list) { - tmp_ses = list_entry(tmp, struct cifs_ses, - smb_ses_list); - if (tmp_ses->vcnum == i) { - free_vc_found = 0; - break; /* found duplicate, try next vcnum */ - } - } - if (free_vc_found) - break; /* we found a vcnumber that will work - use it */ - } - - if (i == 0) - vcnum = 0; /* for most common case, ie if one smb session, use - vc zero. Also for case when no free vcnum, zero - is safest to send (some clients only send zero) */ - else if (free_vc_found == 0) - vcnum = 1; /* we can not reuse vc=0 safely, since some servers - reset all uids on that, but 1 is ok. */ - else - vcnum = i; - ses->vcnum = vcnum; -get_vc_num_exit: - spin_unlock(&cifs_tcp_ses_lock); - - return cpu_to_le16(vcnum); -} - static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) { __u32 capabilities = 0; @@ -128,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, USHRT_MAX)); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - pSMB->req.VcNumber = get_next_vcnum(ses); + pSMB->req.VcNumber = __constant_cpu_to_le16(1); /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0d424d7ac02b..e274e9c1171f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2563,7 +2563,7 @@ retry: break; } blk_finish_plug(&plug); - if (!ret && !cycled) { + if (!ret && !cycled && wbc->nr_to_write > 0) { cycled = 1; mpd.last_page = writeback_index - 1; mpd.first_page = 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c081e34f717f..03e9bebba198 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1350,6 +1350,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 62b43b577bfc..b7989f2ab4c4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) struct inode *inode; struct dentry *parent; struct fuse_conn *fc; + struct fuse_inode *fi; int ret; inode = ACCESS_ONCE(entry->d_inode); @@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (!err && !outarg.nodeid) err = -ENOENT; if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); + fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_queue_forget(fc, forget, outarg.nodeid, 1); goto invalid; @@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { - fc = get_fuse_conn(inode); - if (fc->readdirplus_auto) { + fi = get_fuse_inode(inode); + if (flags & LOOKUP_RCU) { + if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) + return -ECHILD; + } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { parent = dget_parent(entry); fuse_advise_use_readdirplus(parent->d_inode); dput(parent); @@ -259,7 +263,8 @@ out: invalid: ret = 0; - if (check_submounts_and_drop(entry) != 0) + + if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) ret = 1; goto out; } @@ -1063,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask) struct fuse_access_in inarg; int err; + BUG_ON(mask & MAY_NOT_BLOCK); + if (fc->no_access) return 0; @@ -1150,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask) noticed immediately, only after the attribute timeout has expired */ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { @@ -1291,6 +1295,8 @@ static int fuse_direntplus_link(struct file *file, } found: + if (fc->readdirplus_auto) + set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); fuse_change_entry_timeout(dentry, o); err = 0; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d409deafc67b..4598345ab87d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, { struct fuse_file *ff = file->private_data; struct inode *inode = file->f_inode; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2484,10 +2485,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (lock_inode) { mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_set_nowrite(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { + loff_t endbyte = offset + length - 1; + err = filemap_write_and_wait_range(inode->i_mapping, + offset, endbyte); + if (err) + goto out; + + fuse_sync_writes(inode); + } } + if (!(mode & FALLOC_FL_KEEP_SIZE)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -2520,11 +2531,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, fuse_invalidate_attr(inode); out: - if (lock_inode) { - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_release_nowrite(inode); + if (!(mode & FALLOC_FL_KEEP_SIZE)) + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + + if (lock_inode) mutex_unlock(&inode->i_mutex); - } return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5ced199b50bb..5b9e6f3b6aef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -115,6 +115,8 @@ struct fuse_inode { enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, + /** Initialized with readdirplus */ + FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, }; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 854a8f05a610..02b0df769e2d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1458,7 +1458,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, trace_nfs_atomic_open_enter(dir, ctx, open_flags); nfs_block_sillyrename(dentry->d_parent); - inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { err = PTR_ERR(inode); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e5b804dd944c..77efaf15ec90 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct inode *dir; unsigned openflags = filp->f_flags; struct iattr attr; + int opened = 0; int err; /* @@ -55,7 +56,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_wb_all(inode); } - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); if (IS_ERR(inode)) { err = PTR_ERR(inode); switch (err) { diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 95604f64cab8..c7c295e556ed 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -185,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) if (status) goto out_put; + smp_wmb(); ds->ds_clp = clp; dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: @@ -801,34 +802,35 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - - if (filelayout_test_devid_unavailable(devid)) - return NULL; + struct nfs4_pnfs_ds *ret = ds; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); filelayout_mark_devid_invalid(devid); - return NULL; + goto out; } + smp_rmb(); if (ds->ds_clp) - return ds; + goto out_test_devid; if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; err = nfs4_ds_connect(s, ds); - if (err) { + if (err) nfs4_mark_deviceid_unavailable(devid); - ds = NULL; - } nfs4_clear_ds_conn_bit(ds); } else { /* Either ds is connected, or ds is NULL */ nfs4_wait_ds_connect(ds); } - return ds; +out_test_devid: + if (filelayout_test_devid_unavailable(devid)) + ret = NULL; +out: + return ret; } module_param(dataserver_retrans, uint, 0644); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 989bb9d3074d..d53d6785cba2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -912,6 +912,7 @@ struct nfs4_opendata { struct iattr attrs; unsigned long timestamp; unsigned int rpc_done : 1; + unsigned int file_created : 1; unsigned int is_recover : 1; int rpc_status; int cancelled; @@ -1946,8 +1947,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) + if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_EXCL) + data->file_created = 1; + else if (o_res->cinfo.before != o_res->cinfo.after) + data->file_created = 1; + } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -2191,7 +2197,8 @@ static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, + int *opened) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -2261,6 +2268,8 @@ static int _nfs4_do_open(struct inode *dir, nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } } + if (opendata->file_created) + *opened |= FILE_CREATED; if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) *ctx_th = opendata->f_attr.mdsthreshold; @@ -2289,7 +2298,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, + int *opened) { struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; @@ -2297,7 +2307,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label); + status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); res = ctx->state; trace_nfs4_open_file(ctx, flags, status); if (status == 0) @@ -2659,7 +2669,8 @@ out: } static struct inode * -nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + int open_flags, struct iattr *attr, int *opened) { struct nfs4_state *state; struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; @@ -2667,7 +2678,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx, open_flags, attr, label); + state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened); nfs4_label_release_security(label); @@ -3332,6 +3343,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_label l, *ilabel = NULL; struct nfs_open_context *ctx; struct nfs4_state *state; + int opened = 0; int status = 0; ctx = alloc_nfs_open_context(dentry, FMODE_READ); @@ -3341,7 +3353,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); + state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; @@ -7564,8 +7576,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, { int err; struct page *page; - rpc_authflavor_t flavor; + rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; struct nfs4_secinfo_flavors *flavors; + struct nfs4_secinfo4 *secinfo; + int i; page = alloc_page(GFP_KERNEL); if (!page) { @@ -7587,9 +7601,31 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, if (err) goto out_freepage; - flavor = nfs_find_best_sec(flavors); - if (err == 0) - err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + for (i = 0; i < flavors->num_flavors; i++) { + secinfo = &flavors->flavors[i]; + + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + flavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + break; + default: + flavor = RPC_AUTH_MAXFLAVOR; + break; + } + + if (flavor != RPC_AUTH_MAXFLAVOR) { + err = nfs4_lookup_root_sec(server, fhandle, + info, flavor); + if (!err) + break; + } + } + + if (flavor == RPC_AUTH_MAXFLAVOR) + err = -EPERM; out_freepage: put_page(page); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 0ba679866e50..da276640f776 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_redirected(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) "discard block %llu, size %zu", (u64)bh->b_blocknr, bh->b_size); } + clear_buffer_async_write(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bd88a7461063..9f6b486b6c01 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, bh = head = page_buffers(page); do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); @@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_redirected(bh); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index ef999729e274..0d3a97d2d5f6 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -70,9 +70,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) */ if (inode == NULL) { unsigned long gen = (unsigned long) dentry->d_fsdata; - unsigned long pgen = - OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; - + unsigned long pgen; + spin_lock(&dentry->d_lock); + pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; + spin_unlock(&dentry->d_lock); trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, dentry->d_name.name, pgen, gen); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 121da2dc3be8..d4e81e4a9b04 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { int tmp, hangup_needed = 0; struct ocfs2_super *osb = NULL; - char nodestr[8]; + char nodestr[12]; trace_ocfs2_dismount_volume(sb); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 73feacc49b2e..fd777032c2ba 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1163,21 +1163,6 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct return NULL; } -static int newer_jl_done(struct reiserfs_journal_cnode *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - - cn = cn->hprev; - while (cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && - atomic_read(&cn->jlist->j_commit_left) != 0) - return 0; - cn = cn->hprev; - } - return 1; -} - static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, struct reiserfs_journal_list *, unsigned long, @@ -1353,7 +1338,6 @@ static int flush_journal_list(struct super_block *s, reiserfs_warning(s, "clm-2048", "called with wcount %d", atomic_read(&journal->j_wcount)); } - BUG_ON(jl->j_trans_id == 0); /* if flushall == 0, the lock is already held */ if (flushall) { @@ -1593,31 +1577,6 @@ static int flush_journal_list(struct super_block *s, return err; } -static int test_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) - return 1; - - cn = jl->j_realblock; - while (cn) { - /* if the blocknr == 0, this has been cleared from the hash, - ** skip it - */ - if (cn->blocknr == 0) { - goto next; - } - if (cn->bh && !newer_jl_done(cn)) - return 0; - next: - cn = cn->next; - cond_resched(); - } - return 0; -} - static int write_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_chunk *chunk) @@ -1805,6 +1764,8 @@ static int flush_used_journal_lists(struct super_block *s, break; tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); } + get_journal_list(jl); + get_journal_list(flush_jl); /* try to find a group of blocks we can flush across all the ** transactions, but only bother if we've actually spanned ** across multiple lists @@ -1813,6 +1774,8 @@ static int flush_used_journal_lists(struct super_block *s, ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); } flush_journal_list(s, flush_jl, 1); + put_journal_list(s, flush_jl); + put_journal_list(s, jl); return 0; } @@ -3868,27 +3831,6 @@ int reiserfs_prepare_for_journal(struct super_block *sb, return 1; } -static void flush_old_journal_lists(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct reiserfs_journal_list *jl; - struct list_head *entry; - time_t now = get_seconds(); - - while (!list_empty(&journal->j_journal_list)) { - entry = journal->j_journal_list.next; - jl = JOURNAL_LIST_ENTRY(entry); - /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && - atomic_read(&jl->j_commit_left) == 0 && - test_transaction(s, jl)) { - flush_used_journal_lists(s, jl); - } else { - break; - } - } -} - /* ** long and ugly. If flush, will not return until all commit ** blocks and all real buffers in the trans are on disk. @@ -4232,7 +4174,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, } } } - flush_old_journal_lists(sb); journal->j_current_jl->j_list_bitmap = get_list_bitmap(sb, journal->j_current_jl); diff --git a/fs/statfs.c b/fs/statfs.c index c219e733f553..083dc0ac9140 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -94,7 +94,7 @@ retry: int fd_statfs(int fd, struct kstatfs *st) { - struct fd f = fdget(fd); + struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { error = vfs_statfs(&f.file->f_path, st); diff --git a/fs/super.c b/fs/super.c index 3a96c9783a8b..0225c20f8770 100644 --- a/fs/super.c +++ b/fs/super.c @@ -264,6 +264,8 @@ out_free_sb: */ static inline void destroy_super(struct super_block *s) { + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); #ifdef CONFIG_SMP free_percpu(s->s_files); #endif @@ -323,8 +325,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - list_lru_destroy(&s->s_dentry_lru); - list_lru_destroy(&s->s_inode_lru); put_filesystem(fs); put_super(s); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index d0c6a007ce83..eda10959714f 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb = sb; sbi->s_block_base = 0; sbi->s_type = FSTYPE_V7; + mutex_init(&sbi->s_lock); sb->s_fs_info = sbi; sb_set_blocksize(sb, 512); diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 7e5aae4bf46f..6eaf5edf1ea1 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -30,18 +30,17 @@ void udf_free_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); + struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); - mutex_lock(&sbi->s_alloc_mutex); - if (sbi->s_lvid_bh) { - struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); + if (lvidiu) { + mutex_lock(&sbi->s_alloc_mutex); if (S_ISDIR(inode->i_mode)) le32_add_cpu(&lvidiu->numDirs, -1); else le32_add_cpu(&lvidiu->numFiles, -1); udf_updated_lvid(sb); + mutex_unlock(&sbi->s_alloc_mutex); } - mutex_unlock(&sbi->s_alloc_mutex); udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); } @@ -55,6 +54,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); + struct logicalVolIntegrityDescImpUse *lvidiu; inode = new_inode(sb); @@ -92,12 +92,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) return NULL; } - if (sbi->s_lvid_bh) { - struct logicalVolIntegrityDescImpUse *lvidiu; - + lvidiu = udf_sb_lvidiu(sb); + if (lvidiu) { iinfo->i_unique = lvid_get_unique_id(sb); mutex_lock(&sbi->s_alloc_mutex); - lvidiu = udf_sb_lvidiu(sbi); if (S_ISDIR(mode)) le32_add_cpu(&lvidiu->numDirs, 1); else diff --git a/fs/udf/super.c b/fs/udf/super.c index 839a2bad7f45..91219385691d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -94,13 +94,25 @@ static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); static int udf_show_options(struct seq_file *, struct dentry *); -struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) { - struct logicalVolIntegrityDesc *lvid = - (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; - __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions); - __u32 offset = number_of_partitions * 2 * - sizeof(uint32_t)/sizeof(uint8_t); + struct logicalVolIntegrityDesc *lvid; + unsigned int partnum; + unsigned int offset; + + if (!UDF_SB(sb)->s_lvid_bh) + return NULL; + lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; + partnum = le32_to_cpu(lvid->numOfPartitions); + if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) - + offsetof(struct logicalVolIntegrityDesc, impUse)) / + (2 * sizeof(uint32_t)) < partnum) { + udf_err(sb, "Logical volume integrity descriptor corrupted " + "(numOfPartitions = %u)!\n", partnum); + return NULL; + } + /* The offset is to skip freeSpaceTable and sizeTable arrays */ + offset = partnum * 2 * sizeof(uint32_t); return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); } @@ -629,9 +641,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); int error = 0; + struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); - if (sbi->s_lvid_bh) { - int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); + if (lvidiu) { + int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) return -EACCES; } @@ -1905,11 +1918,12 @@ static void udf_open_lvid(struct super_block *sb) if (!bh) return; - - mutex_lock(&sbi->s_alloc_mutex); lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - lvidiu = udf_sb_lvidiu(sbi); + lvidiu = udf_sb_lvidiu(sb); + if (!lvidiu) + return; + mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; udf_time_to_disk_stamp(&lvid->recordingDateAndTime, @@ -1937,10 +1951,12 @@ static void udf_close_lvid(struct super_block *sb) if (!bh) return; + lvid = (struct logicalVolIntegrityDesc *)bh->b_data; + lvidiu = udf_sb_lvidiu(sb); + if (!lvidiu) + return; mutex_lock(&sbi->s_alloc_mutex); - lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - lvidiu = udf_sb_lvidiu(sbi); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME); @@ -2093,15 +2109,19 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (sbi->s_lvid_bh) { struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); - uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); - uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); - /* uint16_t maxUDFWriteRev = - le16_to_cpu(lvidiu->maxUDFWriteRev); */ + udf_sb_lvidiu(sb); + uint16_t minUDFReadRev; + uint16_t minUDFWriteRev; + if (!lvidiu) { + ret = -EINVAL; + goto error_out; + } + minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); + minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); if (minUDFReadRev > UDF_MAX_READ_VERSION) { udf_err(sb, "minUDFReadRev=%x (max is %x)\n", - le16_to_cpu(lvidiu->minUDFReadRev), + minUDFReadRev, UDF_MAX_READ_VERSION); ret = -EINVAL; goto error_out; @@ -2265,11 +2285,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) struct logicalVolIntegrityDescImpUse *lvidiu; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - if (sbi->s_lvid_bh != NULL) - lvidiu = udf_sb_lvidiu(sbi); - else - lvidiu = NULL; - + lvidiu = udf_sb_lvidiu(sb); buf->f_type = UDF_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len; diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index ed401e94aa8c..1f32c7bd9f57 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -162,7 +162,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb) return sb->s_fs_info; } -struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi); +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb); int udf_compute_nr_groups(struct super_block *sb, u32 partition); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 88c5ea75ebf6..f1d85cfc0a54 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -628,6 +628,7 @@ xfs_buf_item_unlock( else if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); if (lip->li_flags & XFS_LI_IN_AIL) { + spin_lock(&lip->li_ailp->xa_lock); xfs_trans_ail_delete(lip->li_ailp, lip, SHUTDOWN_LOG_IO_ERROR); } diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 069537c845e5..20bf8e8002d6 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1224,6 +1224,7 @@ xfs_da3_node_toosmall( /* start with smaller blk num */ forward = nodehdr.forw < nodehdr.back; for (i = 0; i < 2; forward = !forward, i++) { + struct xfs_da3_icnode_hdr thdr; if (forward) blkno = nodehdr.forw; else @@ -1236,10 +1237,10 @@ xfs_da3_node_toosmall( return(error); node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&thdr, node); xfs_trans_brelse(state->args->trans, bp); - if (count - nodehdr.count >= 0) + if (count - thdr.count >= 0) break; /* fits with at least 25% to spare */ } if (i >= 2) { diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 0957aa98b6c0..12dad188939d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -1158,7 +1158,7 @@ xfs_dir2_sf_to_block( /* * Create entry for . */ - dep = xfs_dir3_data_dot_entry_p(hdr); + dep = xfs_dir3_data_dot_entry_p(mp, hdr); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; @@ -1172,7 +1172,7 @@ xfs_dir2_sf_to_block( /* * Create entry for .. */ - dep = xfs_dir3_data_dotdot_entry_p(hdr); + dep = xfs_dir3_data_dotdot_entry_p(mp, hdr); dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; @@ -1183,7 +1183,7 @@ xfs_dir2_sf_to_block( blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); - offset = xfs_dir3_data_first_offset(hdr); + offset = xfs_dir3_data_first_offset(mp); /* * Loop over existing entries, stuff them in. */ diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h index a0961a61ac1a..9cf67381adf6 100644 --- a/fs/xfs/xfs_dir2_format.h +++ b/fs/xfs/xfs_dir2_format.h @@ -497,69 +497,58 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) /* * Offsets of . and .. in data space (always block 0) * - * The macros are used for shortform directories as they have no headers to read - * the magic number out of. Shortform directories need to know the size of the - * data block header because the sfe embeds the block offset of the entry into - * it so that it doesn't change when format conversion occurs. Bad Things Happen - * if we don't follow this rule. - * * XXX: there is scope for significant optimisation of the logic here. Right * now we are checking for "dir3 format" over and over again. Ideally we should * only do it once for each operation. */ -#define XFS_DIR3_DATA_DOT_OFFSET(mp) \ - xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb)) -#define XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \ - (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1)) -#define XFS_DIR3_DATA_FIRST_OFFSET(mp) \ - (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2)) - static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dot_offset(struct xfs_mount *mp) { - return xfs_dir3_data_entry_offset(hdr); + return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); } static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dotdot_offset(struct xfs_mount *mp) { - bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - return xfs_dir3_data_dot_offset(hdr) + - __xfs_dir3_data_entsize(dir3, 1); + return xfs_dir3_data_dot_offset(mp) + + xfs_dir3_data_entsize(mp, 1); } static inline xfs_dir2_data_aoff_t -xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_first_offset(struct xfs_mount *mp) { - bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - return xfs_dir3_data_dotdot_offset(hdr) + - __xfs_dir3_data_entsize(dir3, 2); + return xfs_dir3_data_dotdot_offset(mp) + + xfs_dir3_data_entsize(mp, 2); } /* * location of . and .. in data space (always block 0) */ static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dot_offset(hdr)); + ((char *)hdr + xfs_dir3_data_dot_offset(mp)); } static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_dotdot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr)); + ((char *)hdr + xfs_dir3_data_dotdot_offset(mp)); } static inline struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr) +xfs_dir3_data_first_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) { return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_first_offset(hdr)); + ((char *)hdr + xfs_dir3_data_first_offset(mp)); } /* diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 8993ec17452c..8f84153e98a8 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -119,9 +119,9 @@ xfs_dir2_sf_getdents( * mp->m_dirdatablk. */ dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - XFS_DIR3_DATA_DOT_OFFSET(mp)); + xfs_dir3_data_dot_offset(mp)); dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - XFS_DIR3_DATA_DOTDOT_OFFSET(mp)); + xfs_dir3_data_dotdot_offset(mp)); /* * Put . entry unless we're starting past it. diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index bb6e2848f473..3ef6d402084c 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -557,7 +557,7 @@ xfs_dir2_sf_addname_hard( * to insert the new entry. * If it's going to end up at the end then oldsfep will point there. */ - for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp), + for (offset = xfs_dir3_data_first_offset(mp), oldsfep = xfs_dir2_sf_firstentry(oldsfp), add_datasize = xfs_dir3_data_entsize(mp, args->namelen), eof = (char *)oldsfep == &buf[old_isize]; @@ -640,7 +640,7 @@ xfs_dir2_sf_addname_pick( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = xfs_dir3_data_entsize(mp, args->namelen); - offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); + offset = xfs_dir3_data_first_offset(mp); sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* @@ -713,7 +713,7 @@ xfs_dir2_sf_check( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); + offset = xfs_dir3_data_first_offset(mp); ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 71520e6e5d65..1ee776d477c3 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33; struct kmem_zone *xfs_qm_dqtrxzone; static struct kmem_zone *xfs_qm_dqzone; -static struct lock_class_key xfs_dquot_other_class; +static struct lock_class_key xfs_dquot_group_class; +static struct lock_class_key xfs_dquot_project_class; /* * This is called to free all the memory associated with a dquot @@ -703,8 +704,20 @@ xfs_qm_dqread( * Make sure group quotas have a different lock class than user * quotas. */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + switch (type) { + case XFS_DQ_USER: + /* uses the default lock class */ + break; + case XFS_DQ_GROUP: + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); + break; + case XFS_DQ_PROJ: + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); + break; + default: + ASSERT(0); + break; + } XFS_STATS_INC(xs_qm_dquot); diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 1edb5cc3e5f4..18272c766a50 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -515,7 +515,7 @@ typedef struct xfs_swapext /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) -#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks) +#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 193206ba4358..474807a401c8 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -119,11 +119,6 @@ xfs_inode_free( ip->i_itemp = NULL; } - /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(!xfs_isiflocked(ip)); - /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the @@ -135,6 +130,10 @@ xfs_inode_free( ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!xfs_isiflocked(ip)); + call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index dabda9521b4b..39797490a1f1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1585,6 +1585,7 @@ xlog_recover_add_to_trans( "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); + kmem_free(ptr); return XFS_ERROR(EIO); } @@ -1970,6 +1971,13 @@ xlog_recover_do_inode_buffer( * magic number. If we don't recognise the magic number in the buffer, then * return a LSN of -1 so that the caller knows it was an unrecognised block and * so can recover the buffer. + * + * Note: we cannot rely solely on magic number matches to determine that the + * buffer has a valid LSN - we also need to verify that it belongs to this + * filesystem, so we need to extract the object's LSN and compare it to that + * which we read from the superblock. If the UUIDs don't match, then we've got a + * stale metadata block from an old filesystem instance that we need to recover + * over the top of. */ static xfs_lsn_t xlog_recover_get_buf_lsn( @@ -1980,6 +1988,8 @@ xlog_recover_get_buf_lsn( __uint16_t magic16; __uint16_t magicda; void *blk = bp->b_addr; + uuid_t *uuid; + xfs_lsn_t lsn = -1; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -1992,43 +2002,79 @@ xlog_recover_get_buf_lsn( case XFS_ABTB_MAGIC: case XFS_ABTC_MAGIC: case XFS_IBT_CRC_MAGIC: - case XFS_IBT_MAGIC: - return be64_to_cpu( - ((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn); + case XFS_IBT_MAGIC: { + struct xfs_btree_block *btb = blk; + + lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); + uuid = &btb->bb_u.s.bb_uuid; + break; + } case XFS_BMAP_CRC_MAGIC: - case XFS_BMAP_MAGIC: - return be64_to_cpu( - ((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn); + case XFS_BMAP_MAGIC: { + struct xfs_btree_block *btb = blk; + + lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); + uuid = &btb->bb_u.l.bb_uuid; + break; + } case XFS_AGF_MAGIC: - return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); + lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); + uuid = &((struct xfs_agf *)blk)->agf_uuid; + break; case XFS_AGFL_MAGIC: - return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); + lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); + uuid = &((struct xfs_agfl *)blk)->agfl_uuid; + break; case XFS_AGI_MAGIC: - return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); + lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); + uuid = &((struct xfs_agi *)blk)->agi_uuid; + break; case XFS_SYMLINK_MAGIC: - return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); + lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); + uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; + break; case XFS_DIR3_BLOCK_MAGIC: case XFS_DIR3_DATA_MAGIC: case XFS_DIR3_FREE_MAGIC: - return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); + lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); + uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; + break; case XFS_ATTR3_RMT_MAGIC: - return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); + lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); + uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; + break; case XFS_SB_MAGIC: - return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); + lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); + uuid = &((struct xfs_dsb *)blk)->sb_uuid; + break; default: break; } + if (lsn != (xfs_lsn_t)-1) { + if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + goto recover_immediately; + return lsn; + } + magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: case XFS_DA3_NODE_MAGIC: - return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); + lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); + uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; + break; default: break; } + if (lsn != (xfs_lsn_t)-1) { + if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + goto recover_immediately; + return lsn; + } + /* * We do individual object checks on dquot and inode buffers as they * have their own individual LSN records. Also, we could have a stale diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index d06079c774a0..99b490b4d05a 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -6,12 +6,12 @@ static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) return mk_pte(page, pgprot); } -static inline int huge_pte_write(pte_t pte) +static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); } -static inline int huge_pte_dirty(pte_t pte) +static inline unsigned long huge_pte_dirty(pte_t pte) { return pte_dirty(pte); } diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h index e69de29bb2d1..b1a49677fe25 100644 --- a/include/asm-generic/vtime.h +++ b/include/asm-generic/vtime.h @@ -0,0 +1 @@ +/* no content, but patch(1) dislikes empty files */ diff --git a/include/dt-bindings/pinctrl/omap.h b/include/dt-bindings/pinctrl/omap.h index edbd250809cb..bed35e36fd27 100644 --- a/include/dt-bindings/pinctrl/omap.h +++ b/include/dt-bindings/pinctrl/omap.h @@ -23,7 +23,7 @@ #define PULL_UP (1 << 4) #define ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define INPUT_EN (1 << 8) #define OFF_EN (1 << 9) #define OFFOUT_EN (1 << 10) @@ -31,8 +31,6 @@ #define OFF_PULL_EN (1 << 12) #define OFF_PULL_UP (1 << 13) #define WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define WAKEUP_EVENT (1 << 15) /* Active pin states */ diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index f7f1d7169b11..089743ade734 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -159,6 +159,26 @@ static inline bool balloon_page_movable(struct page *page) } /* + * isolated_balloon_page - identify an isolated balloon page on private + * compaction/migration page lists. + * + * After a compaction thread isolates a balloon page for migration, it raises + * the page refcount to prevent concurrent compaction threads from re-isolating + * the same page. For that reason putback_movable_pages(), or other routines + * that need to identify isolated balloon pages on private pagelists, cannot + * rely on balloon_page_movable() to accomplish the task. + */ +static inline bool isolated_balloon_page(struct page *page) +{ + /* Already isolated balloon pages, by default, have a raised refcount */ + if (page_flags_cleared(page) && !page_mapped(page) && + page_count(page) >= 2) + return __is_movable_balloon_page(page); + + return false; +} + +/* * balloon_page_insert - insert a page into the balloon's page list and make * the page->mapping assignment accordingly. * @page : page to be assigned as a 'balloon page' @@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page) return false; } +static inline bool isolated_balloon_page(struct page *page) +{ + return false; +} + static inline bool balloon_page_isolate(struct page *page) { return false; diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index d66033f418c9..0333e605ea0d 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -242,6 +242,7 @@ extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_up(struct bcma_bus *bus); extern void bcma_core_pci_down(struct bcma_bus *bus); +extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 842de225055f..ded429966c1f 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -65,6 +65,21 @@ #define __visible __attribute__((externally_visible)) #endif +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#if GCC_VERSION <= 40801 +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) +#else +# define asm_volatile_goto(x...) do { asm goto(x); } while (0) +#endif #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if GCC_VERSION >= 40400 diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 653073de09e3..ed419c62dde1 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti); union map_info *dm_get_mapinfo(struct bio *bio); union map_info *dm_get_rq_mapinfo(struct request *rq); +struct queue_limits *dm_get_queue_limits(struct mapped_device *md); + /* * Geometry functions. */ int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); - /*----------------------------------------------------------------- * Functions for manipulating device-mapper tables. *---------------------------------------------------------------*/ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a3b8b2e2d244..d98503bde7e9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -30,10 +30,13 @@ /* * Framework version for util services. */ +#define UTIL_FW_MINOR 0 + +#define UTIL_WS2K8_FW_MAJOR 1 +#define UTIL_WS2K8_FW_VERSION (UTIL_WS2K8_FW_MAJOR << 16 | UTIL_FW_MINOR) #define UTIL_FW_MAJOR 3 -#define UTIL_FW_MINOR 0 -#define UTIL_FW_MAJOR_MINOR (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR) +#define UTIL_FW_VERSION (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR) /* diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 78e2ada50cd5..d380c5e68008 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -55,7 +55,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 482ad2d84a32..672ddc4de4af 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -439,6 +439,17 @@ static inline char *hex_byte_pack(char *buf, u8 byte) return buf; } +extern const char hex_asc_upper[]; +#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] +#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] + +static inline char *hex_byte_pack_upper(char *buf, u8 byte) +{ + *buf++ = hex_asc_upper_hi(byte); + *buf++ = hex_asc_upper_lo(byte); + return buf; +} + static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) { return hex_byte_pack(buf, byte); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0fbbc7aa02cb..9523d2ad7535 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -142,7 +142,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { @@ -189,8 +189,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; - bool done; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); @@ -508,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -671,6 +671,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif +#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA +void kvm_arch_register_noncoherent_dma(struct kvm *kvm); +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); +#else +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return false; +} +#endif + static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP @@ -747,9 +766,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -/* For vcpu->arch.iommu_flags */ -#define KVM_IOMMU_CACHE_COHERENCY 0x1 - #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); @@ -789,7 +805,7 @@ static inline void kvm_guest_enter(void) /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspase from rcu point of view. In + * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. @@ -842,13 +858,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { @@ -1066,6 +1075,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 60e95872da29..ecc82b37c4cc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; -enum mem_cgroup_filter_t { - VISIT, /* visit current node */ - SKIP, /* skip the current node and continue traversal */ - SKIP_TREE, /* skip the whole subtree and continue traversal */ -}; - -/* - * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to - * iterate through the hierarchy tree. Each tree element is checked by the - * predicate before it is returned by the iterator. If a filter returns - * SKIP or SKIP_TREE then the iterator code continues traversal (with the - * next node down the hierarchy or the next node that doesn't belong under the - * memcg's subtree). - */ -typedef enum mem_cgroup_filter_t -(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root); - #ifdef CONFIG_MEMCG /* * All "charge" functions with gfp_mask should use GFP_KERNEL or @@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok); -struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, - struct mem_cgroup *prev, - struct mem_cgroup_reclaim_cookie *reclaim, - mem_cgroup_iter_filter cond); - -static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, - struct mem_cgroup *prev, - struct mem_cgroup_reclaim_cookie *reclaim) -{ - return mem_cgroup_iter_cond(root, prev, reclaim, NULL); -} - +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, + struct mem_cgroup *, + struct mem_cgroup_reclaim_cookie *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); /* @@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, mem_cgroup_update_page_stat(page, idx, -1); } -enum mem_cgroup_filter_t -mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, - struct mem_cgroup *root); +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, + unsigned long *total_scanned); void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, @@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok) { } -static inline struct mem_cgroup * -mem_cgroup_iter_cond(struct mem_cgroup *root, - struct mem_cgroup *prev, - struct mem_cgroup_reclaim_cookie *reclaim, - mem_cgroup_iter_filter cond) -{ - /* first call must return non-NULL, second return NULL */ - return (struct mem_cgroup *)(unsigned long)!prev; -} static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, @@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, } static inline -enum mem_cgroup_filter_t -mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, - struct mem_cgroup *root) +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) { - return VISIT; + return 0; } static inline void mem_cgroup_split_huge_fixup(struct page *head) diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 09c2300ddb37..cb358355ef43 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -45,6 +45,7 @@ #define MAPPER_CTRL_MINOR 236 #define LOOP_CTRL_MINOR 237 #define VHOST_NET_MINOR 238 +#define UHID_MINOR 239 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 68029b30c3dc..5eb4e31af22b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -181,7 +181,7 @@ enum { MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, - MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; enum { @@ -417,7 +417,7 @@ struct mlx5_init_seg { struct health_buffer health; __be32 rsvd2[884]; __be32 health_counter; - __be32 rsvd3[1023]; + __be32 rsvd3[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8888381fc150..6b8c496572c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -82,7 +82,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 20 + MLX5_MAX_EQ_NAME = 32 }; enum { @@ -747,8 +747,7 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, }; enum { @@ -758,7 +757,6 @@ enum { struct mlx5_profile { u64 mask; u32 log_max_qp; - int cmdif_csum; struct { int size; int limit; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ccd4260834c5..bab49da8a0f0 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -15,8 +15,8 @@ #include <linux/spinlock_types.h> #include <linux/linkage.h> #include <linux/lockdep.h> - #include <linux/atomic.h> +#include <asm/processor.h> /* * Simple, straightforward mutexes with strict semantics: @@ -175,8 +175,8 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX -#define arch_mutex_cpu_relax() cpu_relax() +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() #endif #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 01fd84b566f7..49f52c8f4422 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1455,7 +1455,8 @@ struct nfs_rpc_ops { struct inode * (*open_context) (struct inode *dir, struct nfs_open_context *ctx, int open_flags, - struct iattr *iattr); + struct iattr *iattr, + int *); int (*have_delegation)(struct inode *, fmode_t); int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 535cecf1e02f..fcd63baee5f2 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -1,8 +1,6 @@ #ifndef __OF_IRQ_H #define __OF_IRQ_H -#if defined(CONFIG_OF) -struct of_irq; #include <linux/types.h> #include <linux/errno.h> #include <linux/irq.h> @@ -10,14 +8,6 @@ struct of_irq; #include <linux/ioport.h> #include <linux/of.h> -/* - * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC - * implements it differently. However, the prototype is the same for all, - * so declare it here regardless of the CONFIG_OF_IRQ setting. - */ -extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); - -#if defined(CONFIG_OF_IRQ) /** * of_irq - container for device_node/irq_specifier pair for an irq controller * @controller: pointer to interrupt controller device tree node @@ -71,11 +61,17 @@ extern int of_irq_to_resource(struct device_node *dev, int index, extern int of_irq_count(struct device_node *dev); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); -extern struct device_node *of_irq_find_parent(struct device_node *child); extern void of_irq_init(const struct of_device_id *matches); -#endif /* CONFIG_OF_IRQ */ +#if defined(CONFIG_OF) +/* + * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC + * implements it differently. However, the prototype is the same for all, + * so declare it here regardless of the CONFIG_OF_IRQ setting. + */ +extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); +extern struct device_node *of_irq_find_parent(struct device_node *child); #else /* !CONFIG_OF */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h deleted file mode 100644 index c84128255814..000000000000 --- a/include/linux/of_reserved_mem.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __OF_RESERVED_MEM_H -#define __OF_RESERVED_MEM_H - -#ifdef CONFIG_OF_RESERVED_MEM -void of_reserved_mem_device_init(struct device *dev); -void of_reserved_mem_device_release(struct device *dev); -void early_init_dt_scan_reserved_mem(void); -#else -static inline void of_reserved_mem_device_init(struct device *dev) { } -static inline void of_reserved_mem_device_release(struct device *dev) { } -static inline void early_init_dt_scan_reserved_mem(void) { } -#endif - -#endif /* __OF_RESERVED_MEM_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 866e85c5eb94..c8ba627c1d60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -294,9 +294,31 @@ struct ring_buffer; */ struct perf_event { #ifdef CONFIG_PERF_EVENTS - struct list_head group_entry; + /* + * entry onto perf_event_context::event_list; + * modifications require ctx->lock + * RCU safe iterations. + */ struct list_head event_entry; + + /* + * XXX: group_entry and sibling_list should be mutually exclusive; + * either you're a sibling on a group, or you're the group leader. + * Rework the code to always use the same list element. + * + * Locked for modification by both ctx->mutex and ctx->lock; holding + * either sufficies for read. + */ + struct list_head group_entry; struct list_head sibling_list; + + /* + * We need storage to track the entries in perf_pmu_migrate_context; we + * cannot use the event_entry because of RCU and we want to keep the + * group in tact which avoids us using the other two entries. + */ + struct list_head migrate_entry; + struct hlist_node hlist_entry; int nr_siblings; int group_flags; diff --git a/include/linux/random.h b/include/linux/random.h index 3b9377d6b7a5..6312dd9ba449 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); +extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 67e13aa5a478..9bdad43ad228 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -40,6 +40,8 @@ enum regulator_status { }; /** + * struct regulator_linear_range - specify linear voltage ranges + * * Specify a range of voltages for regulator_map_linar_range() and * regulator_list_linear_range(). * diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..7bb4b4a2a101 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -285,6 +285,14 @@ static inline void lockup_detector_init(void) } #endif +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ddb48d9312c..c2d89335f637 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -498,7 +498,7 @@ struct sk_buff { * headers if needed */ __u8 encapsulation:1; - /* 7/9 bit hole (depending on ndisc_nodetype presence) */ + /* 6/8 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL diff --git a/include/linux/smp.h b/include/linux/smp.h index cfb7ca094b38..731f5237d5f4 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } +static inline void __smp_call_function_single(int cpuid, + struct call_single_data *data, int wait) +{ + on_each_cpu(data->func, data->info, wait); +} + #endif /* !SMP */ /* diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c114614ed172..9b058eecd403 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/** + * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock + * + * Converts the preceding srcu_read_unlock into a two-way memory barrier. + * + * Call this after srcu_read_unlock, to guarantee that all memory operations + * that occur after smp_mb__after_srcu_read_unlock will appear to happen after + * the preceding srcu_read_unlock. + */ +static inline void smp_mb__after_srcu_read_unlock(void) +{ + /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ +} + #endif diff --git a/include/linux/timex.h b/include/linux/timex.h index dd3edd7dfc94..9d3f1a5b6178 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -64,6 +64,20 @@ #include <asm/timex.h> +#ifndef random_get_entropy +/* + * The random_get_entropy() function is used by the /dev/random driver + * in order to extract entropy via the relative unpredictability of + * when an interrupt takes places versus a high speed, fine-grained + * timing source or cycle counter. Since it will be occurred on every + * single interrupt, it must have a very low cost/overhead. + * + * By default we use get_cycles() for this purpose, but individual + * architectures may override this in their asm/timex.h header file. + */ +#define random_get_entropy() get_cycles() +#endif + /* * SHIFT_PLL is used as a dampening factor to define how much we * adjust the frequency correction for a given offset in PLL mode. diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9cb2fe8ca944..e303eef94dd5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -42,6 +42,7 @@ struct usbnet { struct usb_host_endpoint *status; unsigned maxpacket; struct timer_list delay; + const char *padding_pkt; /* protocol/interface state */ struct net_device *net; diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 80cf8173a65b..2c02f3a8d2ba 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -65,15 +65,8 @@ struct pci_dev; * out of the arbitration process (and can be safe to take * interrupts at any time. */ -#if defined(CONFIG_VGA_ARB) extern void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); -#else -static inline void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes) -{ -} -#endif /** * vga_get - acquire & locks VGA resources diff --git a/include/net/addrconf.h b/include/net/addrconf.h index fb314de2b61b..86505bfa5d2c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -67,6 +67,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); #endif +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, + struct net_device *dev); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index aaeaf0938ec0..15f10841e2b5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -104,6 +104,7 @@ enum { enum { HCI_SETUP, HCI_AUTO_OFF, + HCI_RFKILLED, HCI_MGMT, HCI_PAIRABLE, HCI_SERVICE_CACHE, diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f0d70f066f3d..9c4d37ec45a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -723,8 +723,6 @@ struct ip_vs_dest_dst { struct rcu_head rcu_head; }; -/* In grace period after removing */ -#define IP_VS_DEST_STATE_REMOVING 0x01 /* * The real server destination forwarding entry * with ip address, port number, and so on. @@ -742,7 +740,7 @@ struct ip_vs_dest { atomic_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ - unsigned long state; /* state flags */ + unsigned long idle_start; /* start time, jiffies */ /* connection counters and thresholds */ atomic_t activeconns; /* active connections */ @@ -756,14 +754,13 @@ struct ip_vs_dest { struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ /* for virtual service */ - struct ip_vs_service *svc; /* service it belongs to */ + struct ip_vs_service __rcu *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ __be16 vport; /* virtual port number */ union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ struct list_head t_list; /* in dest_trash */ - struct rcu_head rcu_head; unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -1649,7 +1646,7 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) /* CONFIG_IP_VS_NFCT */ #endif -static inline unsigned int +static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { /* diff --git a/include/net/mrp.h b/include/net/mrp.h index 4fbf02aa2ec1..0f7558b638ae 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -112,6 +112,7 @@ struct mrp_applicant { struct mrp_application *app; struct net_device *dev; struct timer_list join_timer; + struct timer_list periodic_timer; spinlock_t lock; struct sk_buff_head queue; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1313456a0994..9d22f08896c6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -74,6 +74,7 @@ struct net { struct hlist_head *dev_index_head; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; + unsigned int dev_unreg_count; /* core fib_rules */ struct list_head rules_ops; diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 806f54a290d6..f572f313d6f1 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -56,7 +56,7 @@ struct synproxy_options { struct tcphdr; struct xt_synproxy_info; -extern void synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, +extern bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts); extern unsigned int synproxy_options_size(const struct synproxy_options *opts); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 6ca975bebd37..c2e542b27a5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,7 +3,6 @@ #include <linux/types.h> -extern void net_secret_init(void); extern __u32 secure_ip_id(__be32 daddr); extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); diff --git a/include/net/sock.h b/include/net/sock.h index 6ba2e7b0e2b1..1d37a8086bed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -409,6 +409,11 @@ struct sock { void (*sk_destruct)(struct sock *sk); }; +#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) + +#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) +#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) + /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK * or not whether his port will be reused by someone else. SK_FORCE_REUSE diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index fa8b3adf9ffb..46d41e8b0dcc 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1007,4 +1007,6 @@ struct drm_radeon_info { #define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA 3 #define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2 +#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 99c25338ede8..902f12461873 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* @@ -541,6 +545,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -668,6 +673,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING @@ -843,6 +849,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds @@ -1012,6 +1022,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 40a1fb807396..009a655a5d35 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -380,10 +380,13 @@ struct perf_event_mmap_page { union { __u64 capabilities; struct { - __u64 cap_usr_time : 1, - cap_usr_rdpmc : 1, - cap_usr_time_zero : 1, - cap_____res : 61; + __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ + cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ + + cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ + cap_user_time : 1, /* The time_* fields are used */ + cap_user_time_zero : 1, /* The time_zero field is used */ + cap_____res : 59; }; }; @@ -442,12 +445,13 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[119]; /* align to 1k */ + __u8 __reserved[118*8+4]; /* align to 1k. */ /* * Control data for the mmap() data buffer. @@ -528,6 +532,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, diff --git a/init/main.c b/init/main.c index af310afbef28..63d3e8f2970c 100644 --- a/init/main.c +++ b/init/main.c @@ -76,6 +76,7 @@ #include <linux/elevator.h> #include <linux/sched_clock.h> #include <linux/context_tracking.h> +#include <linux/random.h> #include <asm/io.h> #include <asm/bugs.h> @@ -780,6 +781,7 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); + random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) diff --git a/ipc/msg.c b/ipc/msg.c index b0d541d42677..558aa91186b6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) ipc_rmid(&msg_ids(ns), &s->q_perm); } +static void msg_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct msg_queue *msq = ipc_rcu_to_struct(p); + + security_msg_queue_free(msq); + ipc_rcu_free(head); +} + /** * newque - Create a new msg queue * @ns: namespace @@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); if (retval) { - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, ipc_rcu_free); return retval; } /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); return id; } @@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) free_msg(msg); } atomic_sub(msq->q_cbytes, &ns->msg_bytes); - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); } /* @@ -688,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, if (ipcperms(ns, &msq->q_perm, S_IWUGO)) goto out_unlock0; + /* raced with RMID? */ + if (msq->q_perm.deleted) { + err = -EIDRM; + goto out_unlock0; + } + err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) goto out_unlock0; @@ -717,7 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, rcu_read_lock(); ipc_lock_object(&msq->q_perm); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, ipc_rcu_free); if (msq->q_perm.deleted) { err = -EIDRM; goto out_unlock0; @@ -894,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl goto out_unlock1; ipc_lock_object(&msq->q_perm); + + /* raced with RMID? */ + if (msq->q_perm.deleted) { + msg = ERR_PTR(-EIDRM); + goto out_unlock0; + } + msg = find_msg(msq, &msgtyp, mode); if (!IS_ERR(msg)) { /* diff --git a/ipc/sem.c b/ipc/sem.c index 69b6a21f3844..8c4f59b0204a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma) } } +static void sem_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct sem_array *sma = ipc_rcu_to_struct(p); + + security_sem_free(sma); + ipc_rcu_free(head); +} + +/* + * Wait until all currently ongoing simple ops have completed. + * Caller must own sem_perm.lock. + * New simple ops cannot start, because simple ops first check + * that sem_perm.lock is free. + * that a) sem_perm.lock is free and b) complex_count is 0. + */ +static void sem_wait_array(struct sem_array *sma) +{ + int i; + struct sem *sem; + + if (sma->complex_count) { + /* The thread that increased sma->complex_count waited on + * all sem->lock locks. Thus we don't need to wait again. + */ + return; + } + + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have * multiple semaphores in our own semops, or we need to look at * semaphores from other pending complex operations. - * - * Carefully guard against sma->complex_count changing between zero - * and non-zero while we are spinning for the lock. The value of - * sma->complex_count cannot change while we are holding the lock, - * so sem_unlock should be fine. - * - * The global lock path checks that all the local locks have been released, - * checking each local lock once. This means that the local lock paths - * cannot start their critical sections while the global lock is held. */ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { - int locknum; - again: - if (nsops == 1 && !sma->complex_count) { - struct sem *sem = sma->sem_base + sops->sem_num; + struct sem *sem; - /* Lock just the semaphore we are interested in. */ - spin_lock(&sem->lock); + if (nsops != 1) { + /* Complex operation - acquire a full lock */ + ipc_lock_object(&sma->sem_perm); - /* - * If sma->complex_count was set while we were spinning, - * we may need to look at things we did not lock here. + /* And wait until all simple ops that are processed + * right now have dropped their locks. */ - if (unlikely(sma->complex_count)) { - spin_unlock(&sem->lock); - goto lock_array; - } + sem_wait_array(sma); + return -1; + } + + /* + * Only one semaphore affected - try to optimize locking. + * The rules are: + * - optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * - The test for enqueued complex ops is simple: + * sma->complex_count != 0 + * - Testing for complex ops that are processed right now is + * a bit more difficult. Complex ops acquire the full lock + * and first wait that the running simple ops have completed. + * (see above) + * Thus: If we own a simple lock and the global lock is free + * and complex_count is now 0, then it will stay 0 and + * thus just locking sem->lock is sufficient. + */ + sem = sma->sem_base + sops->sem_num; + if (sma->complex_count == 0) { /* - * Another process is holding the global lock on the - * sem_array; we cannot enter our critical section, - * but have to wait for the global lock to be released. + * It appears that no complex operation is around. + * Acquire the per-semaphore lock. */ - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { - spin_unlock(&sem->lock); - spin_unlock_wait(&sma->sem_perm.lock); - goto again; + spin_lock(&sem->lock); + + /* Then check that the global lock is free */ + if (!spin_is_locked(&sma->sem_perm.lock)) { + /* spin_is_locked() is not a memory barrier */ + smp_mb(); + + /* Now repeat the test of complex_count: + * It can't change anymore until we drop sem->lock. + * Thus: if is now 0, then it will stay 0. + */ + if (sma->complex_count == 0) { + /* fast path successful! */ + return sops->sem_num; + } } + spin_unlock(&sem->lock); + } - locknum = sops->sem_num; + /* slow path: acquire the full lock */ + ipc_lock_object(&sma->sem_perm); + + if (sma->complex_count == 0) { + /* False alarm: + * There is no complex operation, thus we can switch + * back to the fast path. + */ + spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); + return sops->sem_num; } else { - int i; - /* - * Lock the semaphore array, and wait for all of the - * individual semaphore locks to go away. The code - * above ensures no new single-lock holders will enter - * their critical section while the array lock is held. + /* Not a false alarm, thus complete the sequence for a + * full lock. */ - lock_array: - ipc_lock_object(&sma->sem_perm); - for (i = 0; i < sma->sem_nsems; i++) { - struct sem *sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); - } - locknum = -1; + sem_wait_array(sma); + return -1; } - return locknum; } static inline void sem_unlock(struct sem_array *sma, int locknum) @@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns static inline void sem_lock_and_putref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_putref(sma); -} - -static inline void sem_putref(struct sem_array *sma) -{ - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return retval; } id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (id < 0) { - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); return id; } ns->used_sems += nsems; @@ -873,6 +918,24 @@ again: } /** + * set_semotime(sma, sops) - set sem_otime + * @sma: semaphore array + * @sops: operations that modified the array, may be NULL + * + * sem_otime is replicated to avoid cache line trashing. + * This function sets one instance to the current time. + */ +static void set_semotime(struct sem_array *sma, struct sembuf *sops) +{ + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } +} + +/** * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue * @sma: semaphore array * @sops: operations that were performed @@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop } } } - if (otime) { - if (sops == NULL) { - sma->sem_base[0].sem_otime = get_seconds(); - } else { - sma->sem_base[sops[0].sem_num].sem_otime = - get_seconds(); - } - } + if (otime) + set_semotime(sma, sops); } - /* The following counts are associated to each semaphore: * semncnt number of tasks waiting on semval being nonzero * semzcnt number of tasks waiting on semval being zero @@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); } static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) @@ -1292,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } @@ -1328,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } } if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -ERANGE; goto out_free; } @@ -1629,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return ERR_PTR(-ENOMEM); } @@ -1795,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, error = perform_atomic_semop(sma, sops, nsops, un, task_tgid_vnr(current)); - if (error <= 0) { - if (alter && error == 0) + if (error == 0) { + /* If the operation was successful, then do + * the required updates. + */ + if (alter) do_smart_update(sma, sops, nsops, 1, &tasks); - - goto out_unlock_free; + else + set_semotime(sma, sops); } + if (error <= 0) + goto out_unlock_free; /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. @@ -2059,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) struct sem_array *sma = it; time_t sem_otime; + /* + * The proc interface isn't aware of sem_lock(), it calls + * ipc_lock_object() directly (in sysvipc_find_ipc). + * In order to stay compatible with sem_lock(), we must wait until + * all simple semop() calls have left their critical regions. + */ + sem_wait_array(sma); + sem_otime = get_semotime(sma); return seq_printf(s, diff --git a/ipc/shm.c b/ipc/shm.c index 2821cdf93adb..d69739610fd4 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) ipc_lock_object(&ipcp->shm_perm); } +static void shm_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct shmid_kernel *shp = ipc_rcu_to_struct(p); + + security_shm_free(shp); + ipc_rcu_free(head); +} + static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { ipc_rmid(&shm_ids(ns), &s->shm_perm); @@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) user_shm_unlock(file_inode(shp->shm_file)->i_size, shp->mlock_user); fput (shp->shm_file); - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); } /* @@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_perm.security = NULL; error = security_shm_alloc(shp); if (error) { - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, ipc_rcu_free); return error; } @@ -566,8 +574,7 @@ no_id: user_shm_unlock(size, shp->mlock_user); fput(file); no_file: - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); return error; } diff --git a/ipc/util.c b/ipc/util.c index e829da9ed01f..fdb8ae740775 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size) kfree(ptr); } -struct ipc_rcu { - struct rcu_head rcu; - atomic_t refcount; -} ____cacheline_aligned_in_smp; - /** * ipc_rcu_alloc - allocate ipc and rcu space * @size: size desired @@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr) return atomic_inc_not_zero(&p->refcount); } -/** - * ipc_schedule_free - free ipc + rcu space - * @head: RCU callback structure for queued work - */ -static void ipc_schedule_free(struct rcu_head *head) -{ - vfree(container_of(head, struct ipc_rcu, rcu)); -} - -void ipc_rcu_putref(void *ptr) +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) { struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; if (!atomic_dec_and_test(&p->refcount)) return; - if (is_vmalloc_addr(ptr)) { - call_rcu(&p->rcu, ipc_schedule_free); - } else { - kfree_rcu(p, rcu); - } + call_rcu(&p->rcu, func); +} + +void ipc_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); } /** diff --git a/ipc/util.h b/ipc/util.h index c5f3338ba1fa..f2f5036f2eed 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } static inline void shm_exit_ns(struct ipc_namespace *ns) { } #endif +struct ipc_rcu { + struct rcu_head rcu; + atomic_t refcount; +} ____cacheline_aligned_in_smp; + +#define ipc_rcu_to_struct(p) ((void *)(p+1)) + /* * Structure that holds the parameters needed by the ipc operations * (see after) @@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); */ void* ipc_rcu_alloc(int size); int ipc_rcu_getref(void *ptr); -void ipc_rcu_putref(void *ptr); +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); +void ipc_rcu_free(struct rcu_head *head); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); diff --git a/kernel/audit.c b/kernel/audit.c index 91e53d04b6a9..7b0e23a740ce 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, sleep_time = timeout_start + audit_backlog_wait_time - jiffies; - if ((long)sleep_time > 0) + if ((long)sleep_time > 0) { wait_for_auditd(sleep_time); - continue; + continue; + } } if (audit_rate_check() && printk_ratelimit()) printk(KERN_WARNING diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 247091bf0587..859c8dfd78a1 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -51,6 +51,15 @@ void context_tracking_user_enter(void) unsigned long flags; /* + * Repeat the user_enter() check here because some archs may be calling + * this from asm and if no CPU needs context tracking, they shouldn't + * go further. Repeat the check here until they support the static key + * check. + */ + if (!static_key_false(&context_tracking_enabled)) + return; + + /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() @@ -151,6 +160,9 @@ void context_tracking_user_exit(void) { unsigned long flags; + if (!static_key_false(&context_tracking_enabled)) + return; + if (in_interrupt()) return; diff --git a/kernel/events/core.c b/kernel/events/core.c index dd236b66ca3a..d49a9d29334c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } +static void perf_event_init_userpage(struct perf_event *event) +{ + struct perf_event_mmap_page *userpg; + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb) + goto unlock; + + userpg = rb->user_page; + + /* Allow new userspace to detect that bit 0 is deprecated */ + userpg->cap_bit0_is_deprecated = 1; + userpg->size = offsetof(struct perf_event_mmap_page, __reserved); + +unlock: + rcu_read_unlock(); +} + void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -4044,6 +4064,7 @@ again: ring_buffer_attach(event, rb); rcu_assign_pointer(event->rb, rb); + perf_event_init_userpage(event); perf_event_update_userpage(event); unlock: @@ -7213,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) perf_remove_from_context(event); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); - list_add(&event->event_entry, &events); + list_add(&event->migrate_entry, &events); } mutex_unlock(&src_ctx->mutex); synchronize_rcu(); mutex_lock(&dst_ctx->mutex); - list_for_each_entry_safe(event, tmp, &events, event_entry) { - list_del(&event->event_entry); + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; account_event_cpu(event, dst_cpu); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..dfdf51534b3e 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -203,6 +203,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, return ret; } +static atomic_t reset_hung_task = ATOMIC_INIT(0); + +void reset_hung_task_detector(void) +{ + atomic_set(&reset_hung_task, 1); +} +EXPORT_SYMBOL_GPL(reset_hung_task_detector); + /* * kthread which checks for tasks stuck in D state */ @@ -216,6 +224,9 @@ static int watchdog(void *dummy) while (schedule_timeout_interruptible(timeout_jiffies(timeout))) timeout = sysctl_hung_task_timeout_secs; + if (atomic_xchg(&reset_hung_task, 0)) + continue; + check_hung_uninterruptible_tasks(timeout); } diff --git a/kernel/kmod.c b/kernel/kmod.c index fb326365b694..b086006c59e7 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) DECLARE_COMPLETION_ONSTACK(done); int retval = 0; + if (!sub_info->path) { + call_usermodehelper_freeinfo(sub_info); + return -EINVAL; + } helper_lock(); if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; diff --git a/kernel/params.c b/kernel/params.c index 81c4e78c8f4c..c00d5b502aa4 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -254,11 +254,11 @@ int parse_args(const char *doing, STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul); -STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul); +STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtol); STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul); -STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul); +STANDARD_PARAM_DEF(int, int, "%i", long, kstrtol); STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul); -STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul); +STANDARD_PARAM_DEF(long, long, "%li", long, kstrtol); STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul); int param_set_charp(const char *val, const struct kernel_param *kp) diff --git a/kernel/pid.c b/kernel/pid.c index ebe5e80b10f8..9b9a26698144 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -273,6 +273,11 @@ void free_pid(struct pid *pid) */ wake_up_process(ns->child_reaper); break; + case PIDNS_HASH_ADDING: + /* Handle a fork failure of the first process */ + WARN_ON(ns->child_reaper); + ns->nr_hashed = 0; + /* fall through */ case 0: schedule_work(&ns->proc_work); break; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 358a146fd4da..98c3b34a4cff 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -743,7 +743,10 @@ int create_basic_memory_bitmaps(void) struct memory_bitmap *bm1, *bm2; int error = 0; - BUG_ON(forbidden_pages_map || free_pages_map); + if (forbidden_pages_map && free_pages_map) + return 0; + else + BUG_ON(forbidden_pages_map || free_pages_map); bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) diff --git a/kernel/power/user.c b/kernel/power/user.c index 72e8f4fd616d..957f06164ad1 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -39,6 +39,7 @@ static struct snapshot_data { char frozen; char ready; char platform_support; + bool free_bitmaps; } snapshot_state; atomic_t snapshot_device_available = ATOMIC_INIT(1); @@ -82,6 +83,10 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (!error) { + error = create_basic_memory_bitmaps(); + data->free_bitmaps = !error; + } if (error) pm_notifier_call_chain(PM_POST_RESTORE); } @@ -111,6 +116,8 @@ static int snapshot_release(struct inode *inode, struct file *filp) pm_restore_gfp_mask(); free_basic_memory_bitmaps(); thaw_processes(); + } else if (data->free_bitmaps) { + free_basic_memory_bitmaps(); } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); @@ -231,6 +238,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; pm_restore_gfp_mask(); free_basic_memory_bitmaps(); + data->free_bitmaps = false; thaw_processes(); data->frozen = 0; break; diff --git a/kernel/reboot.c b/kernel/reboot.c index 269ed9384cc4..f813b3474646 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid); #endif enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; -int reboot_default; +/* + * This variable is used privately to keep track of whether or not + * reboot_type is still set to its default value (i.e., reboot= hasn't + * been set on the command line). This is needed so that we can + * suppress DMI scanning for reboot quirks. Without it, it's + * impossible to override a faulty reboot quirk without recompiling. + */ +int reboot_default = 1; int reboot_cpu; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 11cd13667359..7c70201fbc61 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4242,7 +4242,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) } if (!se) { - cfs_rq->h_load = rq->avg.load_avg_contrib; + cfs_rq->h_load = cfs_rq->runnable_load_avg; cfs_rq->last_h_load_update = now; } @@ -4823,8 +4823,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) (busiest->load_per_task * SCHED_POWER_SCALE) / busiest->group_power; - if (busiest->avg_load - local->avg_load + scaled_busy_load_per_task >= - (scaled_busy_load_per_task * imbn)) { + if (busiest->avg_load + scaled_busy_load_per_task >= + local->avg_load + (scaled_busy_load_per_task * imbn)) { env->imbalance = busiest->load_per_task; return; } @@ -4896,7 +4896,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * max load less than avg load(as we skip the groups at or below * its cpu_power, while calculating max_load..) */ - if (busiest->avg_load < sds->avg_load) { + if (busiest->avg_load <= sds->avg_load || + local->avg_load >= sds->avg_load) { env->imbalance = 0; return fix_small_imbalance(env, sds); } diff --git a/kernel/softirq.c b/kernel/softirq.c index 53cc09ceb0b8..d7d498d8cc4f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -328,10 +328,19 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (!force_irqthreads) - __do_softirq(); - else + if (!force_irqthreads) { + /* + * We can safely execute softirq on the current stack if + * it is the irq stack, because it should be near empty + * at this stage. But we have no way to know if the arch + * calls irq_exit() on the irq stack. So call softirq + * in its own stack to prevent from any overrun on top + * of a potentially deep task stack. + */ + do_softirq(); + } else { wakeup_softirqd(); + } } static inline void tick_irq_exit(void) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51c4f34d258e..4431610f049a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = { .unpark = watchdog_enable, }; -static int watchdog_enable_all_cpus(void) +static void restart_watchdog_hrtimer(void *info) +{ + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + int ret; + + /* + * No need to cancel and restart hrtimer if it is currently executing + * because it will reprogram itself with the new period now. + * We should never see it unqueued here because we are running per-cpu + * with interrupts disabled. + */ + ret = hrtimer_try_to_cancel(hrtimer); + if (ret == 1) + hrtimer_start(hrtimer, ns_to_ktime(sample_period), + HRTIMER_MODE_REL_PINNED); +} + +static void update_timers(int cpu) +{ + struct call_single_data data = {.func = restart_watchdog_hrtimer}; + /* + * Make sure that perf event counter will adopt to a new + * sampling period. Updating the sampling period directly would + * be much nicer but we do not have an API for that now so + * let's use a big hammer. + * Hrtimer will adopt the new period on the next tick but this + * might be late already so we have to restart the timer as well. + */ + watchdog_nmi_disable(cpu); + __smp_call_function_single(cpu, &data, 1); + watchdog_nmi_enable(cpu); +} + +static void update_timers_all_cpus(void) +{ + int cpu; + + get_online_cpus(); + preempt_disable(); + for_each_online_cpu(cpu) + update_timers(cpu); + preempt_enable(); + put_online_cpus(); +} + +static int watchdog_enable_all_cpus(bool sample_period_changed) { int err = 0; @@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void) pr_err("Failed to create watchdog threads, disabled\n"); else watchdog_running = 1; + } else if (sample_period_changed) { + update_timers_all_cpus(); } return err; @@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int err, old_thresh, old_enabled; + static DEFINE_MUTEX(watchdog_proc_mutex); + mutex_lock(&watchdog_proc_mutex); old_thresh = ACCESS_ONCE(watchdog_thresh); old_enabled = ACCESS_ONCE(watchdog_user_enabled); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err || !write) - return err; + goto out; set_sample_period(); /* @@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, * watchdog_*_all_cpus() function takes care of this. */ if (watchdog_user_enabled && watchdog_thresh) - err = watchdog_enable_all_cpus(); + err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); else watchdog_disable_all_cpus(); @@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write, watchdog_thresh = old_thresh; watchdog_user_enabled = old_enabled; } - +out: + mutex_unlock(&watchdog_proc_mutex); return err; } #endif /* CONFIG_SYSCTL */ @@ -554,5 +604,5 @@ void __init lockup_detector_init(void) set_sample_period(); if (watchdog_user_enabled) - watchdog_enable_all_cpus(); + watchdog_enable_all_cpus(false); } diff --git a/lib/hexdump.c b/lib/hexdump.c index 3f0494c9d57a..8499c810909a 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -14,6 +14,8 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); +const char hex_asc_upper[] = "0123456789ABCDEF"; +EXPORT_SYMBOL(hex_asc_upper); /** * hex_to_bin - convert a hex digit to its real value diff --git a/lib/kobject.c b/lib/kobject.c index 962175134702..084f7b18d0c0 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -592,7 +592,7 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - pr_debug("kobject: '%s' (%p): %s, parent %p (delayed)\n", + pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", kobject_name(kobj), kobj, __func__, kobj->parent); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); schedule_delayed_work(&kobj->release, HZ); @@ -933,10 +933,7 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) bool kobj_ns_current_may_mount(enum kobj_ns_type type) { - bool may_mount = false; - - if (type == KOBJ_NS_TYPE_NONE) - return true; + bool may_mount = true; spin_lock(&kobj_ns_type_lock); if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && diff --git a/lib/lockref.c b/lib/lockref.c index 677d036cf3c7..6f9d434c1521 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -4,6 +4,22 @@ #ifdef CONFIG_CMPXCHG_LOCKREF /* + * Allow weakly-ordered memory architectures to provide barrier-less + * cmpxchg semantics for lockref updates. + */ +#ifndef cmpxchg64_relaxed +# define cmpxchg64_relaxed cmpxchg64 +#endif + +/* + * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP. + * This is useful for architectures with an expensive cpu_relax(). + */ +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() +#endif + +/* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. */ @@ -14,12 +30,13 @@ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ - old.lock_count = cmpxchg64(&lockref->lock_count, \ - old.lock_count, new.lock_count); \ + old.lock_count = cmpxchg64_relaxed(&lockref->lock_count, \ + old.lock_count, \ + new.lock_count); \ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ - cpu_relax(); \ + arch_mutex_cpu_relax(); \ } \ } while (0) diff --git a/mm/Kconfig b/mm/Kconfig index 026771a9b097..394838f489eb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -183,7 +183,7 @@ config MEMORY_HOTPLUG_SPARSE config MEMORY_HOTREMOVE bool "Allow for memory hot remove" select MEMORY_ISOLATION - select HAVE_BOOTMEM_INFO_NODE if X86_64 + select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64) depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION diff --git a/mm/bounce.c b/mm/bounce.c index c9f0a4339a7d..5a7d58fb883b 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -204,6 +204,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, struct bio_vec *to, *from; unsigned i; + if (force) + goto bounce; bio_for_each_segment(from, *bio_orig, i) if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) goto bounce; diff --git a/mm/compaction.c b/mm/compaction.c index c43789388cd8..b5326b141a25 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -677,6 +677,13 @@ static void isolate_freepages(struct zone *zone, pfn -= pageblock_nr_pages) { unsigned long isolated; + /* + * This can iterate a massively long zone without finding any + * suitable migration targets, so periodically check if we need + * to schedule. + */ + cond_resched(); + if (!pfn_valid(pfn)) continue; diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index afc2daa91c60..4c84678371eb 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -20,8 +20,6 @@ static int hwpoison_inject(void *data, u64 val) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!hwpoison_filter_enable) - goto inject; if (!pfn_valid(pfn)) return -ENXIO; @@ -33,6 +31,9 @@ static int hwpoison_inject(void *data, u64 val) if (!get_page_unless_zero(hpage)) return 0; + if (!hwpoison_filter_enable) + goto inject; + if (!PageLRU(p) && !PageHuge(p)) shake_page(p, 0); /* diff --git a/mm/madvise.c b/mm/madvise.c index 6975bc812542..539eeb96b323 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -343,10 +343,11 @@ static long madvise_remove(struct vm_area_struct *vma, */ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) { + struct page *p; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - for (; start < end; start += PAGE_SIZE) { - struct page *p; + for (; start < end; start += PAGE_SIZE << + compound_order(compound_head(p))) { int ret; ret = get_user_pages_fast(start, 1, 0, &p); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5ff3ce13029..1c52ddbc839b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -39,6 +39,7 @@ #include <linux/limits.h> #include <linux/export.h> #include <linux/mutex.h> +#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -160,6 +161,10 @@ struct mem_cgroup_per_zone { struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; + struct rb_node tree_node; /* RB tree node */ + unsigned long long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; @@ -168,6 +173,26 @@ struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; }; +/* + * Cgroups above their limits are maintained in a RB-Tree, independent of + * their hierarchy representation + */ + +struct mem_cgroup_tree_per_zone { + struct rb_root rb_root; + spinlock_t lock; +}; + +struct mem_cgroup_tree_per_node { + struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; +}; + +struct mem_cgroup_tree { + struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; +}; + +static struct mem_cgroup_tree soft_limit_tree __read_mostly; + struct mem_cgroup_threshold { struct eventfd_ctx *eventfd; u64 threshold; @@ -303,22 +328,6 @@ struct mem_cgroup { atomic_t numainfo_events; atomic_t numainfo_updating; #endif - /* - * Protects soft_contributed transitions. - * See mem_cgroup_update_soft_limit - */ - spinlock_t soft_lock; - - /* - * If true then this group has increased parents' children_in_excess - * when it got over the soft limit. - * When a group falls bellow the soft limit, parents' children_in_excess - * is decreased and soft_contributed changed to false. - */ - bool soft_contributed; - - /* Number of children that are in soft limit excess */ - atomic_t children_in_excess; struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ @@ -422,6 +431,7 @@ static bool move_file(void) * limit reclaim to prevent infinite loops, if they ever occur. */ #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 +#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, @@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) return mem_cgroup_zoneinfo(memcg, nid, zid); } +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_node_zone(int nid, int zid) +{ + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_from_page(struct page *page) +{ + int nid = page_to_nid(page); + int zid = page_zonenum(page); + + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static void +__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz, + unsigned long long new_usage_in_excess) +{ + struct rb_node **p = &mctz->rb_root.rb_node; + struct rb_node *parent = NULL; + struct mem_cgroup_per_zone *mz_node; + + if (mz->on_tree) + return; + + mz->usage_in_excess = new_usage_in_excess; + if (!mz->usage_in_excess) + return; + while (*p) { + parent = *p; + mz_node = rb_entry(parent, struct mem_cgroup_per_zone, + tree_node); + if (mz->usage_in_excess < mz_node->usage_in_excess) + p = &(*p)->rb_left; + /* + * We can't avoid mem cgroups that are over their soft + * limit by the same amount + */ + else if (mz->usage_in_excess >= mz_node->usage_in_excess) + p = &(*p)->rb_right; + } + rb_link_node(&mz->tree_node, parent, p); + rb_insert_color(&mz->tree_node, &mctz->rb_root); + mz->on_tree = true; +} + +static void +__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + if (!mz->on_tree) + return; + rb_erase(&mz->tree_node, &mctz->rb_root); + mz->on_tree = false; +} + +static void +mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + spin_lock(&mctz->lock); + __mem_cgroup_remove_exceeded(memcg, mz, mctz); + spin_unlock(&mctz->lock); +} + + +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) +{ + unsigned long long excess; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + int nid = page_to_nid(page); + int zid = page_zonenum(page); + mctz = soft_limit_tree_from_page(page); + + /* + * Necessary to update all ancestors when hierarchy is used. + * because their event counter is not touched. + */ + for (; memcg; memcg = parent_mem_cgroup(memcg)) { + mz = mem_cgroup_zoneinfo(memcg, nid, zid); + excess = res_counter_soft_limit_excess(&memcg->res); + /* + * We have to update the tree if mz is on RB-tree or + * mem is over its softlimit. + */ + if (excess || mz->on_tree) { + spin_lock(&mctz->lock); + /* if on-tree, remove it */ + if (mz->on_tree) + __mem_cgroup_remove_exceeded(memcg, mz, mctz); + /* + * Insert again. mz->usage_in_excess will be updated. + * If excess is 0, no tree ops. + */ + __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); + spin_unlock(&mctz->lock); + } + } +} + +static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) +{ + int node, zone; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + + for_each_node(node) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + mz = mem_cgroup_zoneinfo(memcg, node, zone); + mctz = soft_limit_tree_node_zone(node, zone); + mem_cgroup_remove_exceeded(memcg, mz, mctz); + } + } +} + +static struct mem_cgroup_per_zone * +__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct rb_node *rightmost = NULL; + struct mem_cgroup_per_zone *mz; + +retry: + mz = NULL; + rightmost = rb_last(&mctz->rb_root); + if (!rightmost) + goto done; /* Nothing to reclaim from */ + + mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); + /* + * Remove the node now but someone else can add it back, + * we will to add it back at the end of reclaim to its correct + * position in the tree. + */ + __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + if (!res_counter_soft_limit_excess(&mz->memcg->res) || + !css_tryget(&mz->memcg->css)) + goto retry; +done: + return mz; +} + +static struct mem_cgroup_per_zone * +mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct mem_cgroup_per_zone *mz; + + spin_lock(&mctz->lock); + mz = __mem_cgroup_largest_soft_limit_node(mctz); + spin_unlock(&mctz->lock); + return mz; +} + /* * Implementation Note: reading percpu statistics for memcg. * @@ -822,48 +990,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, } /* - * Called from rate-limited memcg_check_events when enough - * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure - * that all the parents up the hierarchy will be notified that this group - * is in excess or that it is not in excess anymore. mmecg->soft_contributed - * makes the transition a single action whenever the state flips from one to - * the other. - */ -static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) -{ - unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); - struct mem_cgroup *parent = memcg; - int delta = 0; - - spin_lock(&memcg->soft_lock); - if (excess) { - if (!memcg->soft_contributed) { - delta = 1; - memcg->soft_contributed = true; - } - } else { - if (memcg->soft_contributed) { - delta = -1; - memcg->soft_contributed = false; - } - } - - /* - * Necessary to update all ancestors when hierarchy is used - * because their event counter is not touched. - * We track children even outside the hierarchy for the root - * cgroup because tree walk starting at root should visit - * all cgroups and we want to prevent from pointless tree - * walk if no children is below the limit. - */ - while (delta && (parent = parent_mem_cgroup(parent))) - atomic_add(delta, &parent->children_in_excess); - if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) - atomic_add(delta, &root_mem_cgroup->children_in_excess); - spin_unlock(&memcg->soft_lock); -} - -/* * Check events in order. * */ @@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_soft_limit(memcg); + mem_cgroup_update_tree(memcg, page); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); @@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return memcg; } -static enum mem_cgroup_filter_t -mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, - mem_cgroup_iter_filter cond) -{ - if (!cond) - return VISIT; - return cond(memcg, root); -} - /* * Returns a next (in a pre-order walk) alive memcg (with elevated css * ref. count) or NULL if the whole root's subtree has been visited. @@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, * helper function to be used by mem_cgroup_iter */ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, - struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond) + struct mem_cgroup *last_visited) { struct cgroup_subsys_state *prev_css, *next_css; @@ -963,31 +1080,11 @@ skip_node: if (next_css) { struct mem_cgroup *mem = mem_cgroup_from_css(next_css); - switch (mem_cgroup_filter(mem, root, cond)) { - case SKIP: + if (css_tryget(&mem->css)) + return mem; + else { prev_css = next_css; goto skip_node; - case SKIP_TREE: - if (mem == root) - return NULL; - /* - * css_rightmost_descendant is not an optimal way to - * skip through a subtree (especially for imbalanced - * trees leaning to right) but that's what we have right - * now. More effective solution would be traversing - * right-up for first non-NULL without calling - * css_next_descendant_pre afterwards. - */ - prev_css = css_rightmost_descendant(next_css); - goto skip_node; - case VISIT: - if (css_tryget(&mem->css)) - return mem; - else { - prev_css = next_css; - goto skip_node; - } - break; } } @@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, * @root: hierarchy root * @prev: previously returned memcg, NULL on first invocation * @reclaim: cookie for shared reclaim walks, NULL for full walks - * @cond: filter for visited nodes, NULL for no filter * * Returns references to children of the hierarchy below @root, or * @root itself, or %NULL after a full round-trip. @@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, * divide up the memcgs in the hierarchy among all concurrent * reclaimers operating on the same zone and priority. */ -struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, - struct mem_cgroup_reclaim_cookie *reclaim, - mem_cgroup_iter_filter cond) + struct mem_cgroup_reclaim_cookie *reclaim) { struct mem_cgroup *memcg = NULL; struct mem_cgroup *last_visited = NULL; - if (mem_cgroup_disabled()) { - /* first call must return non-NULL, second return NULL */ - return (struct mem_cgroup *)(unsigned long)!prev; - } + if (mem_cgroup_disabled()) + return NULL; if (!root) root = root_mem_cgroup; @@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, if (!root->use_hierarchy && root != root_mem_cgroup) { if (prev) goto out_css_put; - if (mem_cgroup_filter(root, root, cond) == VISIT) - return root; - return NULL; + return root; } rcu_read_lock(); @@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, last_visited = mem_cgroup_iter_load(iter, root, &seq); } - memcg = __mem_cgroup_iter_next(root, last_visited, cond); + memcg = __mem_cgroup_iter_next(root, last_visited); if (reclaim) { mem_cgroup_iter_update(iter, last_visited, memcg, seq); @@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, reclaim->generation = iter->generation; } - /* - * We have finished the whole tree walk or no group has been - * visited because filter told us to skip the root node. - */ - if (!memcg && (prev || (cond && !last_visited))) + if (prev && !memcg) goto out_unlock; } out_unlock: @@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, return total; } -#if MAX_NUMNODES > 1 /** * test_mem_cgroup_node_reclaimable * @memcg: the target memcg @@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg, return false; } +#if MAX_NUMNODES > 1 /* * Always updating the nodemask is not very good - even if we have an empty @@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) return node; } +/* + * Check all nodes whether it contains reclaimable pages or not. + * For quick scan, we make use of scan_nodes. This will allow us to skip + * unused nodes. But scan_nodes is lazily updated and may not cotain + * enough new information. We need to do double check. + */ +static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) +{ + int nid; + + /* + * quick check...making use of scan_node. + * We can skip unused nodes. + */ + if (!nodes_empty(memcg->scan_nodes)) { + for (nid = first_node(memcg->scan_nodes); + nid < MAX_NUMNODES; + nid = next_node(nid, memcg->scan_nodes)) { + + if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) + return true; + } + } + /* + * Check rest of nodes. + */ + for_each_node_state(nid, N_MEMORY) { + if (node_isset(nid, memcg->scan_nodes)) + continue; + if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) + return true; + } + return false; +} + #else int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) { return 0; } -#endif - -/* - * A group is eligible for the soft limit reclaim under the given root - * hierarchy if - * a) it is over its soft limit - * b) any parent up the hierarchy is over its soft limit - * - * If the given group doesn't have any children over the limit then it - * doesn't make any sense to iterate its subtree. - */ -enum mem_cgroup_filter_t -mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, - struct mem_cgroup *root) +static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) { - struct mem_cgroup *parent; - - if (!memcg) - memcg = root_mem_cgroup; - parent = memcg; - - if (res_counter_soft_limit_excess(&memcg->res)) - return VISIT; + return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); +} +#endif - /* - * If any parent up to the root in the hierarchy is over its soft limit - * then we have to obey and reclaim from this group as well. - */ - while ((parent = parent_mem_cgroup(parent))) { - if (res_counter_soft_limit_excess(&parent->res)) - return VISIT; - if (parent == root) +static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, + struct zone *zone, + gfp_t gfp_mask, + unsigned long *total_scanned) +{ + struct mem_cgroup *victim = NULL; + int total = 0; + int loop = 0; + unsigned long excess; + unsigned long nr_scanned; + struct mem_cgroup_reclaim_cookie reclaim = { + .zone = zone, + .priority = 0, + }; + + excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; + + while (1) { + victim = mem_cgroup_iter(root_memcg, victim, &reclaim); + if (!victim) { + loop++; + if (loop >= 2) { + /* + * If we have not been able to reclaim + * anything, it might because there are + * no reclaimable pages under this hierarchy + */ + if (!total) + break; + /* + * We want to do more targeted reclaim. + * excess >> 2 is not to excessive so as to + * reclaim too much, nor too less that we keep + * coming back to reclaim from this cgroup + */ + if (total >= (excess >> 2) || + (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) + break; + } + continue; + } + if (!mem_cgroup_reclaimable(victim, false)) + continue; + total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, + zone, &nr_scanned); + *total_scanned += nr_scanned; + if (!res_counter_soft_limit_excess(&root_memcg->res)) break; } - - if (!atomic_read(&memcg->children_in_excess)) - return SKIP_TREE; - return SKIP; + mem_cgroup_iter_break(root_memcg, victim); + return total; } static DEFINE_SPINLOCK(memcg_oom_lock); @@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, unlock_page_cgroup(pc); /* - * "charge_statistics" updated event counter. + * "charge_statistics" updated event counter. Then, check it. + * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. + * if they exceeds softlimit. */ memcg_check_events(memcg, page); } @@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) +{ + unsigned long nr_reclaimed = 0; + struct mem_cgroup_per_zone *mz, *next_mz = NULL; + unsigned long reclaimed; + int loop = 0; + struct mem_cgroup_tree_per_zone *mctz; + unsigned long long excess; + unsigned long nr_scanned; + + if (order > 0) + return 0; + + mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); + /* + * This loop can run a while, specially if mem_cgroup's continuously + * keep exceeding their soft limit and putting the system under + * pressure + */ + do { + if (next_mz) + mz = next_mz; + else + mz = mem_cgroup_largest_soft_limit_node(mctz); + if (!mz) + break; + + nr_scanned = 0; + reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, + gfp_mask, &nr_scanned); + nr_reclaimed += reclaimed; + *total_scanned += nr_scanned; + spin_lock(&mctz->lock); + + /* + * If we failed to reclaim anything from this memory cgroup + * it is time to move on to the next cgroup + */ + next_mz = NULL; + if (!reclaimed) { + do { + /* + * Loop until we find yet another one. + * + * By the time we get the soft_limit lock + * again, someone might have aded the + * group back on the RB tree. Iterate to + * make sure we get a different mem. + * mem_cgroup_largest_soft_limit_node returns + * NULL if no other cgroup is present on + * the tree + */ + next_mz = + __mem_cgroup_largest_soft_limit_node(mctz); + if (next_mz == mz) + css_put(&next_mz->memcg->css); + else /* next_mz == NULL or other memcg */ + break; + } while (1); + } + __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); + excess = res_counter_soft_limit_excess(&mz->memcg->res); + /* + * One school of thought says that we should not add + * back the node to the tree if reclaim returns 0. + * But our reclaim could return 0, simply because due + * to priority we are exposing a smaller subset of + * memory to reclaim from. Consider this as a longer + * term TODO. + */ + /* If excess == 0, no tree ops */ + __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); + spin_unlock(&mctz->lock); + css_put(&mz->memcg->css); + loop++; + /* + * Could not reclaim anything and there are no more + * mem cgroups to try or we seem to be looping without + * reclaiming anything. + */ + if (!nr_reclaimed && + (next_mz == NULL || + loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) + break; + } while (!nr_reclaimed); + if (next_mz) + css_put(&next_mz->memcg->css); + return nr_reclaimed; +} + /** * mem_cgroup_force_empty_list - clears LRU of a group * @memcg: group to clear @@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; lruvec_init(&mz->lruvec); + mz->usage_in_excess = 0; + mz->on_tree = false; mz->memcg = memcg; } memcg->nodeinfo[node] = pn; @@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) int node; size_t size = memcg_size(); + mem_cgroup_remove_from_trees(memcg); free_css_id(&mem_cgroup_subsys, &memcg->css); for_each_node(node) @@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); +static void __init mem_cgroup_soft_limit_tree_init(void) +{ + struct mem_cgroup_tree_per_node *rtpn; + struct mem_cgroup_tree_per_zone *rtpz; + int tmp, node, zone; + + for_each_node(node) { + tmp = node; + if (!node_state(node, N_NORMAL_MEMORY)) + tmp = -1; + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); + BUG_ON(!rtpn); + + soft_limit_tree.rb_tree_per_node[node] = rtpn; + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + rtpz = &rtpn->rb_tree_per_zone[zone]; + rtpz->rb_root = RB_ROOT; + spin_lock_init(&rtpz->lock); + } + } +} + static struct cgroup_subsys_state * __ref mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { @@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); - spin_lock_init(&memcg->soft_lock); return &memcg->css; @@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); - if (memcg->soft_contributed) { - while ((memcg = parent_mem_cgroup(memcg))) - atomic_dec(&memcg->children_in_excess); - - if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) - atomic_dec(&root_mem_cgroup->children_in_excess); - } mem_cgroup_destroy_all_caches(memcg); vmpressure_cleanup(&memcg->vmpressure); } @@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void) { hotcpu_notifier(memcg_cpu_hotplug_callback, 0); enable_swap_cgroup(); + mem_cgroup_soft_limit_tree_init(); memcg_stock_init(); return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 947ed5413279..bf3351b5115e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1114,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * shake_page could have turned it free. */ if (is_free_buddy_page(p)) { - action_result(pfn, "free buddy, 2nd try", - DELAYED); + if (flags & MF_COUNT_INCREASED) + action_result(pfn, "free buddy", DELAYED); + else + action_result(pfn, "free buddy, 2nd try", DELAYED); return 0; } action_result(pfn, "non LRU", IGNORED); @@ -1349,7 +1351,7 @@ int unpoison_memory(unsigned long pfn) * worked by memory_failure() and the page lock is not held yet. * In such case, we yield to memory_failure() and make unpoison fail. */ - if (PageTransHuge(page)) { + if (!PageHuge(page) && PageTransHuge(page)) { pr_info("MCE: Memory failure is now running on %#lx\n", pfn); return 0; } diff --git a/mm/migrate.c b/mm/migrate.c index 9c8d5f59d30b..a26bccd44ccb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l) list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - if (unlikely(balloon_page_movable(page))) + if (unlikely(isolated_balloon_page(page))) balloon_page_putback(page); else putback_lru_page(page); diff --git a/mm/mlock.c b/mm/mlock.c index d63802663242..d480cd6fc475 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -379,10 +379,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, /* * Initialize pte walk starting at the already pinned page where we - * are sure that there is a pte. + * are sure that there is a pte, as it was pinned under the same + * mmap_sem write op. */ pte = get_locked_pte(vma->vm_mm, start, &ptl); - end = min(end, pmd_addr_end(start, end)); + /* Make sure we do not cross the page table boundary */ + end = pgd_addr_end(start, end); + end = pud_addr_end(start, end); + end = pmd_addr_end(start, end); /* The page next to the pinned page is the first we will try to get */ start += PAGE_SIZE; @@ -736,6 +740,7 @@ static int do_mlockall(int flags) /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); + cond_resched(); } out: return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0ee638f76ebe..dd886fac451a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6366,10 +6366,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) list_del(&page->lru); rmv_page_order(page); zone->free_area[order].nr_free--; -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page)) - totalhigh_pages -= 1 << order; -#endif for (i = 0; i < (1 << order); i++) SetPageReserved((page+i)); pfn += (1 << order); diff --git a/mm/slab_common.c b/mm/slab_common.c index a3443278ce3a..e2e98af703ea 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -56,6 +56,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, continue; } +#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) /* * For simplicity, we won't check this in the list of memcg * caches. We have control over memcg naming, and if there @@ -69,6 +70,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, s = NULL; return -EINVAL; } +#endif } WARN_ON(strchr(name, ' ')); /* It confuses parsers */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ed1b775bdc9..53f2f82f83ae 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -48,6 +48,7 @@ #include <asm/div64.h> #include <linux/swapops.h> +#include <linux/balloon_compaction.h> #include "internal.h" @@ -139,23 +140,11 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } - -static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) -{ - struct mem_cgroup *root = sc->target_mem_cgroup; - return !mem_cgroup_disabled() && - mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE; -} #else static bool global_reclaim(struct scan_control *sc) { return true; } - -static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) -{ - return false; -} #endif unsigned long zone_reclaimable_pages(struct zone *zone) @@ -1125,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_cache(page) && !PageDirty(page)) { + if (page_is_file_cache(page) && !PageDirty(page) && + !isolated_balloon_page(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } @@ -2176,11 +2166,9 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static int -__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { unsigned long nr_reclaimed, nr_scanned; - int groups_scanned = 0; do { struct mem_cgroup *root = sc->target_mem_cgroup; @@ -2188,17 +2176,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) .zone = zone, .priority = sc->priority, }; - struct mem_cgroup *memcg = NULL; - mem_cgroup_iter_filter filter = (soft_reclaim) ? - mem_cgroup_soft_reclaim_eligible : NULL; + struct mem_cgroup *memcg; nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { + memcg = mem_cgroup_iter(root, NULL, &reclaim); + do { struct lruvec *lruvec; - groups_scanned++; lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2218,7 +2204,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) mem_cgroup_iter_break(root, memcg); break; } - } + memcg = mem_cgroup_iter(root, memcg, &reclaim); + } while (memcg); vmpressure(sc->gfp_mask, sc->target_mem_cgroup, sc->nr_scanned - nr_scanned, @@ -2226,37 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); - - return groups_scanned; -} - - -static void shrink_zone(struct zone *zone, struct scan_control *sc) -{ - bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); - unsigned long nr_scanned = sc->nr_scanned; - int scanned_groups; - - scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim); - /* - * memcg iterator might race with other reclaimer or start from - * a incomplete tree walk so the tree walk in __shrink_zone - * might have missed groups that are above the soft limit. Try - * another loop to catch up with others. Do it just once to - * prevent from reclaim latencies when other reclaimers always - * preempt this one. - */ - if (do_soft_reclaim && !scanned_groups) - __shrink_zone(zone, sc, do_soft_reclaim); - - /* - * No group is over the soft limit or those that are do not have - * pages in the zone we are reclaiming so we have to reclaim everybody - */ - if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { - __shrink_zone(zone, sc, false); - return; - } } /* Returns true if compaction should go ahead for a high-order request */ @@ -2320,6 +2276,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; + unsigned long nr_soft_reclaimed; + unsigned long nr_soft_scanned; bool aborted_reclaim = false; /* @@ -2359,6 +2317,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; } } + /* + * This steals pages from memory cgroups over softlimit + * and returns the number of reclaimed pages and + * scanned pages. This works for global memory pressure + * and balancing, not for a memcg's limit. + */ + nr_soft_scanned = 0; + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, + sc->order, sc->gfp_mask, + &nr_soft_scanned); + sc->nr_reclaimed += nr_soft_reclaimed; + sc->nr_scanned += nr_soft_scanned; /* need some check for avoid more shrink_zone() */ } @@ -2952,6 +2922,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ + unsigned long nr_soft_reclaimed; + unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .priority = DEF_PRIORITY, @@ -3066,6 +3038,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, sc.nr_scanned = 0; + nr_soft_scanned = 0; + /* + * Call soft limit reclaim before calling shrink_zone. + */ + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, + order, sc.gfp_mask, + &nr_soft_scanned); + sc.nr_reclaimed += nr_soft_reclaimed; + /* * There should be no need to raise the scanning * priority if enough pages are already being scanned diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d80b07b..3ed616215870 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -24,6 +24,11 @@ static unsigned int mrp_join_time __read_mostly = 200; module_param(mrp_join_time, uint, 0644); MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); + +static unsigned int mrp_periodic_time __read_mostly = 1000; +module_param(mrp_periodic_time, uint, 0644); +MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); + MODULE_LICENSE("GPL"); static const u8 @@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) mrp_join_timer_arm(app); } +static void mrp_periodic_timer_arm(struct mrp_applicant *app) +{ + mod_timer(&app->periodic_timer, + jiffies + msecs_to_jiffies(mrp_periodic_time)); +} + +static void mrp_periodic_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_periodic_timer_arm(app); +} + static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) { __be16 endmark; @@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); mrp_join_timer_arm(app); + setup_timer(&app->periodic_timer, mrp_periodic_timer, + (unsigned long)app); + mrp_periodic_timer_arm(app); return 0; err3: @@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + del_timer_sync(&app->periodic_timer); spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 634debab4d54..fb7356fcfe51 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev) goto done; } - if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { + /* Check for rfkill but allow the HCI setup stage to proceed + * (which in itself doesn't cause any RF activity). + */ + if (test_bit(HCI_RFKILLED, &hdev->dev_flags) && + !test_bit(HCI_SETUP, &hdev->dev_flags)) { ret = -ERFKILL; goto done; } @@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (!blocked) - return 0; - - hci_dev_do_close(hdev); + if (blocked) { + set_bit(HCI_RFKILLED, &hdev->dev_flags); + if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + hci_dev_do_close(hdev); + } else { + clear_bit(HCI_RFKILLED, &hdev->dev_flags); + } return 0; } @@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work) return; } - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { + clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_do_close(hdev); + } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + } if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev) } } + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) + set_bit(HCI_RFKILLED, &hdev->dev_flags); + set_bit(HCI_SETUP, &hdev->dev_flags); if (hdev->dev_type != HCI_AMP) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 94aab73f89d4..8db3e89fae35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) cp.handle = cpu_to_le16(conn->handle); if (ltk->authenticated) - conn->sec_level = BT_SECURITY_HIGH; + conn->pending_sec_level = BT_SECURITY_HIGH; + else + conn->pending_sec_level = BT_SECURITY_MEDIUM; + + conn->enc_key_size = ltk->enc_size; hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b3bb7bca8e60..63fa11109a1c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, sk = chan->sk; + /* For certain devices (ex: HID mouse), support for authentication, + * pairing and bonding is optional. For such devices, inorder to avoid + * the ACL alive for too long after L2CAP disconnection, reset the ACL + * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. + */ + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; + bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); chan->psm = psm; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6d126faf145f..84fcf9fff3ea 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) { struct rfcomm_dev *dev = dlc->owner; - struct tty_struct *tty; if (!dev) return; @@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) DPM_ORDER_DEV_AFTER_PARENT); wake_up_interruptible(&dev->port.open_wait); - } else if (dlc->state == BT_CLOSED) { - tty = tty_port_tty_get(&dev->port); - if (!tty) { - if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - /* Drop DLC lock here to avoid deadlock - * 1. rfcomm_dev_get will take rfcomm_dev_lock - * but in rfcomm_dev_add there's lock order: - * rfcomm_dev_lock -> dlc lock - * 2. tty_port_put will deadlock if it's - * the last reference - * - * FIXME: when we release the lock anything - * could happen to dev, even its destruction - */ - rfcomm_dlc_unlock(dlc); - if (rfcomm_dev_get(dev->id) == NULL) { - rfcomm_dlc_lock(dlc); - return; - } - - if (!test_and_set_bit(RFCOMM_TTY_RELEASED, - &dev->flags)) - tty_port_put(&dev->port); - - tty_port_put(&dev->port); - rfcomm_dlc_lock(dlc); - } - } else { - tty_hangup(tty); - tty_kref_put(tty); - } - } + } else if (dlc->state == BT_CLOSED) + tty_port_tty_hangup(&dev->port, false); } static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..65f829cfd928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); +static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); + dev_net(dev)->dev_unreg_count++; } static void rollback_registered_many(struct list_head *head) @@ -5918,6 +5920,12 @@ void netdev_run_todo(void) if (dev->destructor) dev->destructor(dev); + /* Report a network device has been unregistered */ + rtnl_lock(); + dev_net(dev)->dev_unreg_count--; + __rtnl_unlock(); + wake_up(&netdev_unregistering_wq); + /* Free network device */ kobject_put(&dev->dev.kobj); } @@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } +static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) +{ + /* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace in net_list. + */ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); - rtnl_lock(); + /* To prevent network device cleanup code from dereferencing + * loopback devices or network devices that have been freed + * wait here for all pending unregistrations to complete, + * before unregistring the loopback device and allowing the + * network namespace be freed. + * + * The netdev todo list containing all network devices + * unregistrations that happen in default_device_exit_batch + * will run in the rtnl_unlock() at the end of + * default_device_exit_batch. + */ + rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af87b260..8d7d0dd72db2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,8 +154,8 @@ ipv6: if (poff >= 0) { __be32 *ports, _ports; - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + ports = skb_header_pointer(skb, nhoff + poff, + sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..3f1ec1586ae1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,11 +10,24 @@ #include <net/secure_seq.h> -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } #ifdef CONFIG_INET @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b7b8fd..cfeb85cff4f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,10 +263,8 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } } EXPORT_SYMBOL(build_ehash_secret); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index dace87f06e5f..7defdc9ba167 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data) igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0300f..63a6d6d6b875 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = tnl_params->ttl; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len; - if (max_headroom > dev->needed_headroom) { + if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; - if (skb_cow_head(skb, dev->needed_headroom)) { - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return; - } + + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, - ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, - !net_eq(tunnel->net, dev_net(dev))); + tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return; @@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - if (!IS_ERR(itn->fb_tunnel_dev)) + if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); + } rtnl_unlock(); return PTR_RET(itn->fb_tunnel_dev); @@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); } - if (itn->fb_tunnel_dev) - unregister_netdevice_queue(itn->fb_tunnel_dev, head); } void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d6c856b17fd4..c31e3ad98ef2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ - __skb_push(skb, sizeof(struct iphdr)); + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 67e17dcda65e..b6346bf2fde3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bfec521c717f..193db03540ad 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); - else if (type == ICMP_REDIRECT) + else if (type == ICMP_REDIRECT) { ipv4_sk_redirect(skb, sk); + return; + } /* Report error on raw socket, if: 1. User requested ip_recverr. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8bf137..e6bb8256e59f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? - tcp_wfree : sock_wfree; + skb->destructor = tcp_wfree; atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - /* TSQ : sk_wmem_alloc accounts skb truesize, - * including skb overhead. But thats OK. + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates */ - if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); break; } + limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95db57f..0ca44df51ee9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_REDIRECT: ipv4_sk_redirect(skb, sk); - break; + goto out; } /* diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff12617f36..cd3fb301da38 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; @@ -2193,43 +2220,21 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - if (valid_lft > MIN_VALID_LIFETIME || - valid_lft > stored_lft) - update_lft = 1; - else if (stored_lft <= MIN_VALID_LIFETIME) { - /* valid_lft <= stored_lft is always true */ - /* - * RFC 4862 Section 5.5.3e: - * "Note that the preferred lifetime of - * the corresponding address is always - * reset to the Preferred Lifetime in - * the received Prefix Information - * option, regardless of whether the - * valid lifetime is also reset or - * ignored." - * - * So if the preferred lifetime in - * this advertisement is different - * than what we have stored, but the - * valid lifetime is invalid, just - * reset prefered_lft. - * - * We must set the valid lifetime - * to the stored lifetime since we'll - * be updating the timestamp below, - * else we'll set it back to the - * minimum. - */ - if (prefered_lft != ifp->prefered_lft) { - valid_lft = stored_lft; - update_lft = 1; - } - } else { - valid_lft = MIN_VALID_LIFETIME; - if (valid_lft < prefered_lft) - prefered_lft = valid_lft; - update_lft = 1; - } + const u32 minimum_lft = min( + stored_lft, (u32)MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; } if (update_lft) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9feafb9..7bb5446b9d73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct ip6_tnl *tunnel = netdev_priv(dev); struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ + unsigned int max_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d529163..a54c45ce4a48 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + struct frag_hdr fhdr; + skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; /* Specify the length of each IPv6 datagram fragment. * It has to be a multiple of 8. @@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, ipv6_select_ident(&fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { + if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2d8f4829575b..a791552e0422 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) } } - t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } @@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67b737c..d18f9f903db6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) idev->mc_gq_running = 0; mld_send_report(idev, NULL); - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_timer_expire(unsigned long data) @@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) mld_ifc_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_event(struct inet6_dev *idev) diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8dbcaa..2748b042da72 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bbb1728..a4ed2416399e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + return; + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb96db34..19269453a8ea 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ @@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net) rtnl_lock(); sit_destroy_tunnels(sitn, &list); - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f4058150262b..72b7eaaf3ca0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + goto out; + } np = inet6_sk(sk); diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 54563ad8aeb1..355cc3b6fa4d 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param) } else { lapb->n2count++; lapb_requeue_frames(lapb); + lapb_kick(lapb); } break; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f69e83ff836..74fd00c27210 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.inpkts++; @@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; @@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.outpkts++; @@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8148e487386..a3df9bddc4f7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -460,7 +460,7 @@ static inline void __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) { atomic_inc(&svc->refcnt); - dest->svc = svc; + rcu_assign_pointer(dest->svc, svc); } static void ip_vs_service_free(struct ip_vs_service *svc) @@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc) kfree(svc); } -static void -__ip_vs_unbind_svc(struct ip_vs_dest *dest) +static void ip_vs_service_rcu_free(struct rcu_head *head) { - struct ip_vs_service *svc = dest->svc; + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} - dest->svc = NULL; +static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) +{ if (atomic_dec_and_test(&svc->refcnt)) { IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port)); - ip_vs_service_free(svc); + if (do_delay) + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); + else + ip_vs_service_free(svc); } } @@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - /* We can not reuse dest while in grace period - * because conns still can use dest->svc - */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -697,8 +699,10 @@ out: static void ip_vs_dest_free(struct ip_vs_dest *dest) { + struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); + __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); + __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); kfree(dest); } @@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_service *old_svc; struct ip_vs_scheduler *sched; int conn_flags; @@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, atomic_set(&dest->conn_flags, conn_flags); /* bind the service */ - if (!dest->svc) { + old_svc = rcu_dereference_protected(dest->svc, 1); + if (!old_svc) { __ip_vs_bind_svc(dest, svc); } else { - if (dest->svc != svc) { - __ip_vs_unbind_svc(dest); + if (old_svc != svc) { ip_vs_zero_stats(&dest->stats); __ip_vs_bind_svc(dest, svc); + __ip_vs_svc_put(old_svc, true); } } @@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } -static void ip_vs_dest_wait_readers(struct rcu_head *head) -{ - struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, - rcu_head); - - /* End of grace period after unlinking */ - clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); -} - - /* * Delete a destination (must be already unlinked from the service) */ @@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - if (!cleanup) { - set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); - call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); - } - spin_lock_bh(&ipvs->dest_trash_lock); IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); /* dest lives in trash without reference */ list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); ip_vs_dest_put(dest); } @@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data) struct net *net = (struct net *) data; struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_dest *dest, *next; + unsigned long now = jiffies; spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - /* Skip if dest is in grace period */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (atomic_read(&dest->refcnt) > 0) continue; + if (dest->idle_start) { + if (time_before(now, dest->idle_start + + IP_VS_DEST_TRASH_PERIOD)) + continue; + } else { + dest->idle_start = max(1UL, now); + continue; + } IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", dest->vfwmark, - IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); list_del(&dest->t_list); ip_vs_dest_free(dest); } if (!list_empty(&ipvs->dest_trash)) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); spin_unlock(&ipvs->dest_trash_lock); } @@ -1320,14 +1318,6 @@ out: return ret; } -static void ip_vs_service_rcu_free(struct rcu_head *head) -{ - struct ip_vs_service *svc; - - svc = container_of(head, struct ip_vs_service, rcu_head); - ip_vs_service_free(svc); -} - /* * Delete a service from the service list * - The service must be unlinked, unlocked and not referenced! @@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", - svc->fwmark, - IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port)); - call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); - } + __ip_vs_svc_put(svc, true); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 6bee6d0c73a5..1425e9a924c4 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, struct ip_vs_cpu_stats __percpu *stats) { int i; + bool add = false; for_each_possible_cpu(i) { struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); unsigned int start; __u64 inbytes, outbytes; - if (i) { + if (add) { sum->conns += s->ustats.conns; sum->inpkts += s->ustats.inpkts; sum->outpkts += s->ustats.outpkts; @@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inbytes += inbytes; sum->outbytes += outbytes; } else { + add = true; sum->conns = s->ustats.conns; sum->inpkts = s->ustats.inpkts; sum->outpkts = s->ustats.outpkts; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1383b0eadc0e..eff13c94498e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -93,7 +93,7 @@ struct ip_vs_lblc_entry { struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ - struct ip_vs_dest __rcu *dest; /* real server (cache) */ + struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ struct rcu_head rcu_head; }; @@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = { }; #endif -static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) +static void ip_vs_lblc_rcu_free(struct rcu_head *head) { - struct ip_vs_dest *dest; + struct ip_vs_lblc_entry *en = container_of(head, + struct ip_vs_lblc_entry, + rcu_head); - hlist_del_rcu(&en->list); - /* - * We don't kfree dest because it is referred either by its service - * or the trash dest list. - */ - dest = rcu_dereference_protected(en->dest, 1); - ip_vs_dest_put(dest); - kfree_rcu(en, rcu_head); + ip_vs_dest_put(en->dest); + kfree(en); } +static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en) +{ + hlist_del_rcu(&en->list); + call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free); +} /* * Returns hash value for IPVS LBLC entry @@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, struct ip_vs_lblc_entry *en; en = ip_vs_lblc_get(dest->af, tbl, daddr); - if (!en) { - en = kmalloc(sizeof(*en), GFP_ATOMIC); - if (!en) - return NULL; - - en->af = dest->af; - ip_vs_addr_copy(dest->af, &en->addr, daddr); - en->lastuse = jiffies; + if (en) { + if (en->dest == dest) + return en; + ip_vs_lblc_del(en); + } + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) + return NULL; - ip_vs_dest_hold(dest); - RCU_INIT_POINTER(en->dest, dest); + en->af = dest->af; + ip_vs_addr_copy(dest->af, &en->addr, daddr); + en->lastuse = jiffies; - ip_vs_lblc_hash(tbl, en); - } else { - struct ip_vs_dest *old_dest; + ip_vs_dest_hold(dest); + en->dest = dest; - old_dest = rcu_dereference_protected(en->dest, 1); - if (old_dest != dest) { - ip_vs_dest_put(old_dest); - ip_vs_dest_hold(dest); - /* No ordering constraints for refcnt */ - RCU_INIT_POINTER(en->dest, dest); - } - } + ip_vs_lblc_hash(tbl, en); return en; } @@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) tbl->dead = 1; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); } } @@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) sysctl_lblc_expiration(svc))) continue; - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); } spin_unlock(&svc->sched_lock); @@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); goal--; } @@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } @@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, * free up entries from the trash at any time. */ - dest = rcu_dereference(en->dest); + dest = en->dest; if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) goto out; @@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); - synchronize_rcu(); + rcu_barrier(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 5199448697f6..0b8550089a2e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -89,7 +89,7 @@ */ struct ip_vs_dest_set_elem { struct list_head list; /* list link */ - struct ip_vs_dest __rcu *dest; /* destination server */ + struct ip_vs_dest *dest; /* destination server */ struct rcu_head rcu_head; }; @@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, if (check) { list_for_each_entry(e, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - if (d == dest) - /* already existed */ + if (e->dest == dest) return; } } @@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, return; ip_vs_dest_hold(dest); - RCU_INIT_POINTER(e->dest, dest); + e->dest = dest; list_add_rcu(&e->list, &set->list); atomic_inc(&set->size); @@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, set->lastmod = jiffies; } +static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_set_elem *e; + + e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); + ip_vs_dest_put(e->dest); + kfree(e); +} + static void ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { struct ip_vs_dest_set_elem *e; list_for_each_entry(e, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - if (d == dest) { + if (e->dest == dest) { /* HIT */ atomic_dec(&set->size); set->lastmod = jiffies; - ip_vs_dest_put(dest); list_del_rcu(&e->list); - kfree_rcu(e, rcu_head); + call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); break; } } @@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) struct ip_vs_dest_set_elem *e, *ep; list_for_each_entry_safe(e, ep, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - /* - * We don't kfree dest because it is referred either - * by its service or by the trash dest list. - */ - ip_vs_dest_put(d); list_del_rcu(&e->list); - kfree_rcu(e, rcu_head); + call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); } } @@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) struct ip_vs_dest *dest, *least; int loh, doh; - if (set == NULL) - return NULL; - /* select the first destination server, whose weight > 0 */ list_for_each_entry_rcu(e, &set->list, list) { - least = rcu_dereference(e->dest); + least = e->dest; if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* find the destination with the weighted least load */ nextstage: list_for_each_entry_continue_rcu(e, &set->list, list) { - dest = rcu_dereference(e->dest); + dest = e->dest; if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); - if ((loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) + if (((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { least = dest; loh = doh; @@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* select the first destination server, whose weight > 0 */ list_for_each_entry(e, &set->list, list) { - most = rcu_dereference_protected(e->dest, 1); + most = e->dest; if (atomic_read(&most->weight) > 0) { moh = ip_vs_dest_conn_overhead(most); goto nextstage; @@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* find the destination with the weighted most load */ nextstage: list_for_each_entry_continue(e, &set->list, list) { - dest = rcu_dereference_protected(e->dest, 1); + dest = e->dest; doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ - if ((moh * atomic_read(&dest->weight) < - doh * atomic_read(&most->weight)) + if (((__s64)moh * atomic_read(&dest->weight) < + (__s64)doh * atomic_read(&most->weight)) && (atomic_read(&dest->weight) > 0)) { most = dest; moh = doh; @@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } @@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); - synchronize_rcu(); + rcu_barrier(); } diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index d8d9860934fe..961a6de9bb29 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -40,7 +40,7 @@ #include <net/ip_vs.h> -static inline unsigned int +static inline int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { /* @@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; - unsigned int loh = 0, doh; + int loh = 0, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } if (!least || - (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight))) { + ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight))) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index a5284cc3d882..e446b9fa7424 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -44,7 +44,7 @@ #include <net/ip_vs.h> -static inline unsigned int +static inline int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { /* @@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_sed_dest_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6dc1fa128840..b5b4650d50a9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); @@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6fd967c6278c..cdf4567ba9b3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -24,7 +24,7 @@ int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); -void +bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts) { @@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, u8 buf[40], *ptr; ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); - BUG_ON(ptr == NULL); + if (ptr == NULL) + return false; opts->options = 0; while (length > 0) { @@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_EOL: - return; + return true; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2) - return; + return true; if (opsize > length) - return; + return true; switch (opcode) { case TCPOPT_MSS: @@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, length -= opsize; } } + return true; } EXPORT_SYMBOL_GPL(synproxy_parse_options); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015ee8ce..a2fef8b10b96 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* remove one skb from head of flow queue */ -static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) +static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { struct sk_buff *skb = flow->head; @@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) flow->head = skb->next; skb->next = NULL; flow->qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; } return skb; } @@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; + u32 rate; - skb = fq_dequeue_head(&q->internal); + skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); @@ -449,7 +452,7 @@ begin: goto begin; } - skb = fq_dequeue_head(f); + skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ @@ -466,43 +469,74 @@ begin: f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); - if (f->credit <= 0 && - q->rate_enable && - skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + if (f->credit > 0 || !q->rate_enable) + goto out; - rate = min(rate, q->flow_max_rate); - if (rate) { - u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; - - do_div(len, rate); - /* Since socket rate can change later, - * clamp the delay to 125 ms. - * TODO: maybe segment the too big skb, as in commit - * e43ac79a4bc ("sch_tbf: segment too big GSO packets") - */ - if (unlikely(len > 125 * NSEC_PER_MSEC)) { - len = 125 * NSEC_PER_MSEC; - q->stat_pkts_too_long++; - } + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { + rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; - f->time_next_packet = now + len; + rate = min(rate, q->flow_max_rate); + } else { + rate = q->flow_max_rate; + if (rate == ~0U) + goto out; + } + if (rate) { + u32 plen = max(qdisc_pkt_len(skb), q->quantum); + u64 len = (u64)plen * NSEC_PER_SEC; + + do_div(len, rate); + /* Since socket rate can change later, + * clamp the delay to 125 ms. + * TODO: maybe segment the too big skb, as in commit + * e43ac79a4bc ("sch_tbf: segment too big GSO packets") + */ + if (unlikely(len > 125 * NSEC_PER_MSEC)) { + len = 125 * NSEC_PER_MSEC; + q->stat_pkts_too_long++; } + + f->time_next_packet = now + len; } out: - sch->qstats.backlog -= qdisc_pkt_len(skb); qdisc_bstats_update(sch, skb); - sch->q.qlen--; qdisc_unthrottled(sch); return skb; } static void fq_reset(struct Qdisc *sch) { + struct fq_sched_data *q = qdisc_priv(sch); + struct rb_root *root; struct sk_buff *skb; + struct rb_node *p; + struct fq_flow *f; + unsigned int idx; - while ((skb = fq_dequeue(sch)) != NULL) + while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) kfree_skb(skb); + + if (!q->fq_root) + return; + + for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { + root = &q->fq_root[idx]; + while ((p = rb_first(root)) != NULL) { + f = container_of(p, struct fq_flow, fq_node); + rb_erase(p, root); + + while ((skb = fq_dequeue_head(sch, f)) != NULL) + kfree_skb(skb); + + kmem_cache_free(fq_flow_cachep, f); + } + } + q->new_flows.first = NULL; + q->old_flows.first = NULL; + q->delayed = RB_ROOT; + q->flows = 0; + q->inactive_flows = 0; + q->throttled_flows = 0; } static void fq_rehash(struct fq_sched_data *q, @@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); + if (!skb) + break; kfree_skb(skb); drop_count++; } @@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) static void fq_destroy(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - struct rb_root *root; - struct rb_node *p; - unsigned int idx; - if (q->fq_root) { - for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { - root = &q->fq_root[idx]; - while ((p = rb_first(root)) != NULL) { - rb_erase(p, root); - kmem_cache_free(fq_flow_cachep, - container_of(p, struct fq_flow, fq_node)); - } - } - kfree(q->fq_root); - } + fq_reset(sch); + kfree(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db04be3e..e7000be321b0 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, /* Allow network administrator to have same access as root. */ if (ns_capable(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_uid())) { + uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } /* Allow netns root group to have the same access as the root group */ - if (gid_eq(root_gid, current_gid())) { + if (in_egroup_p(root_gid)) { int mode = (table->mode >> 3) & 7; return (mode << 3) | mode; } diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 47016c304c84..66cad506b8a2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3975,8 +3975,8 @@ sub string_find_replace { # check for new externs in .h files. if ($realfile =~ /\.h$/ && $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { - if (WARN("AVOID_EXTERNS", - "extern prototypes should be avoided in .h files\n" . $herecurr) && + if (CHK("AVOID_EXTERNS", + "extern prototypes should be avoided in .h files\n" . $herecurr) && $fix) { $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; } diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index d6222ba4e919..532471d0b3a0 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -15,14 +15,14 @@ * it should be. */ -#include <linux/crypto.h> +#include <crypto/hash.h> #include "include/apparmor.h" #include "include/crypto.h" static unsigned int apparmor_hash_size; -static struct crypto_hash *apparmor_tfm; +static struct crypto_shash *apparmor_tfm; unsigned int aa_hash_size(void) { @@ -32,35 +32,33 @@ unsigned int aa_hash_size(void) int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - struct scatterlist sg[2]; - struct hash_desc desc = { - .tfm = apparmor_tfm, - .flags = 0 - }; + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(apparmor_tfm)]; + } desc; int error = -ENOMEM; u32 le32_version = cpu_to_le32(version); if (!apparmor_tfm) return 0; - sg_init_table(sg, 2); - sg_set_buf(&sg[0], &le32_version, 4); - sg_set_buf(&sg[1], (u8 *) start, len); - profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL); if (!profile->hash) goto fail; - error = crypto_hash_init(&desc); + desc.shash.tfm = apparmor_tfm; + desc.shash.flags = 0; + + error = crypto_shash_init(&desc.shash); if (error) goto fail; - error = crypto_hash_update(&desc, &sg[0], 4); + error = crypto_shash_update(&desc.shash, (u8 *) &le32_version, 4); if (error) goto fail; - error = crypto_hash_update(&desc, &sg[1], len); + error = crypto_shash_update(&desc.shash, (u8 *) start, len); if (error) goto fail; - error = crypto_hash_final(&desc, profile->hash); + error = crypto_shash_final(&desc.shash, profile->hash); if (error) goto fail; @@ -75,19 +73,19 @@ fail: static int __init init_profile_hash(void) { - struct crypto_hash *tfm; + struct crypto_shash *tfm; if (!apparmor_initialized) return 0; - tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { int error = PTR_ERR(tfm); AA_ERROR("failed to setup profile sha1 hashing: %d\n", error); return error; } apparmor_tfm = tfm; - apparmor_hash_size = crypto_hash_digestsize(apparmor_tfm); + apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm); aa_info_message("AppArmor sha1 policy hashing enabled"); diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index f2d4b6348cbc..c28b0f20ab53 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -360,7 +360,9 @@ static inline void aa_put_replacedby(struct aa_replacedby *p) static inline void __aa_update_replacedby(struct aa_profile *orig, struct aa_profile *new) { - struct aa_profile *tmp = rcu_dereference(orig->replacedby->profile); + struct aa_profile *tmp; + tmp = rcu_dereference_protected(orig->replacedby->profile, + mutex_is_locked(&orig->ns->lock)); rcu_assign_pointer(orig->replacedby->profile, aa_get_profile(new)); orig->flags |= PFLAG_INVALID; aa_put_profile(tmp); diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 6172509fa2b7..345bec07a27d 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -563,7 +563,8 @@ void __init aa_free_root_ns(void) static void free_replacedby(struct aa_replacedby *r) { if (r) { - aa_put_profile(rcu_dereference(r->profile)); + /* r->profile will not be updated any more as r is dead */ + aa_put_profile(rcu_dereference_protected(r->profile, true)); kzfree(r); } } diff --git a/security/selinux/avc.c b/security/selinux/avc.c index dad36a6ab45f..fc3e6628a864 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -746,7 +746,6 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass * @auditdata: auxiliary audit data - * @flags: VFS walk flags * * Check the AVC to determine whether the @requested permissions are granted * for the SID pair (@ssid, @tsid), interpreting the permissions @@ -756,17 +755,15 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ -int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct common_audit_data *auditdata, - unsigned flags) +int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, + u32 requested, struct common_audit_data *auditdata) { struct av_decision avd; int rc, rc2; rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); - rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, - flags); + rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); if (rc2) return rc2; return rc; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a5091ec06aa6..5b5231068516 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1502,7 +1502,7 @@ static int cred_has_capability(const struct cred *cred, rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); if (audit == SECURITY_CAP_AUDIT) { - int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0); + int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad); if (rc2) return rc2; } @@ -1525,8 +1525,7 @@ static int task_has_system(struct task_struct *tsk, static int inode_has_perm(const struct cred *cred, struct inode *inode, u32 perms, - struct common_audit_data *adp, - unsigned flags) + struct common_audit_data *adp) { struct inode_security_struct *isec; u32 sid; @@ -1539,7 +1538,7 @@ static int inode_has_perm(const struct cred *cred, sid = cred_sid(cred); isec = inode->i_security; - return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags); + return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); } /* Same as inode_has_perm, but pass explicit audit data containing @@ -1554,7 +1553,7 @@ static inline int dentry_has_perm(const struct cred *cred, ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; - return inode_has_perm(cred, inode, av, &ad, 0); + return inode_has_perm(cred, inode, av, &ad); } /* Same as inode_has_perm, but pass explicit audit data containing @@ -1569,7 +1568,7 @@ static inline int path_has_perm(const struct cred *cred, ad.type = LSM_AUDIT_DATA_PATH; ad.u.path = *path; - return inode_has_perm(cred, inode, av, &ad, 0); + return inode_has_perm(cred, inode, av, &ad); } /* Same as path_has_perm, but uses the inode from the file struct. */ @@ -1581,7 +1580,7 @@ static inline int file_path_has_perm(const struct cred *cred, ad.type = LSM_AUDIT_DATA_PATH; ad.u.path = file->f_path; - return inode_has_perm(cred, file_inode(file), av, &ad, 0); + return inode_has_perm(cred, file_inode(file), av, &ad); } /* Check whether a task can use an open file descriptor to @@ -1617,7 +1616,7 @@ static int file_has_perm(const struct cred *cred, /* av is zero if only checking access to the descriptor. */ rc = 0; if (av) - rc = inode_has_perm(cred, inode, av, &ad, 0); + rc = inode_has_perm(cred, inode, av, &ad); out: return rc; diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index 92d0ab561db8..f53ee3c58d0f 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -130,7 +130,7 @@ static inline int avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, struct av_decision *avd, int result, - struct common_audit_data *a, unsigned flags) + struct common_audit_data *a) { u32 audited, denied; audited = avc_audit_required(requested, avd, result, 0, &denied); @@ -138,7 +138,7 @@ static inline int avc_audit(u32 ssid, u32 tsid, return 0; return slow_avc_audit(ssid, tsid, tclass, requested, audited, denied, - a, flags); + a, 0); } #define AVC_STRICT 1 /* Ignore permissive mode. */ @@ -147,17 +147,9 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, unsigned flags, struct av_decision *avd); -int avc_has_perm_flags(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - struct common_audit_data *auditdata, - unsigned); - -static inline int avc_has_perm(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - struct common_audit_data *auditdata) -{ - return avc_has_perm_flags(ssid, tsid, tclass, requested, auditdata, 0); -} +int avc_has_perm(u32 ssid, u32 tsid, + u16 tclass, u32 requested, + struct common_audit_data *auditdata); u32 avc_policy_seqno(void); diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 98969541cbcc..bea523a5d852 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -139,6 +139,18 @@ static int snd_compr_open(struct inode *inode, struct file *f) static int snd_compr_free(struct inode *inode, struct file *f) { struct snd_compr_file *data = f->private_data; + struct snd_compr_runtime *runtime = data->stream.runtime; + + switch (runtime->state) { + case SNDRV_PCM_STATE_RUNNING: + case SNDRV_PCM_STATE_DRAINING: + case SNDRV_PCM_STATE_PAUSED: + data->stream.ops->trigger(&data->stream, SNDRV_PCM_TRIGGER_STOP); + break; + default: + break; + } + data->stream.ops->free(&data->stream); kfree(data->stream.runtime->buffer); kfree(data->stream.runtime); @@ -837,7 +849,8 @@ static int snd_compress_dev_disconnect(struct snd_device *device) struct snd_compr *compr; compr = device->device_data; - snd_unregister_device(compr->direction, compr->card, compr->device); + snd_unregister_device(SNDRV_DEVICE_TYPE_COMPRESS, compr->card, + compr->device); return 0; } diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 445ca481d8d3..bf578ba2677e 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -175,6 +175,7 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = { { 0x54524106, 0xffffffff, "TR28026", NULL, NULL }, { 0x54524108, 0xffffffff, "TR28028", patch_tritech_tr28028, NULL }, // added by xin jin [07/09/99] { 0x54524123, 0xffffffff, "TR28602", NULL, NULL }, // only guess --jk [TR28023 = eMicro EM28023 (new CT1297)] +{ 0x54584e03, 0xffffffff, "TLV320AIC27", NULL, NULL }, { 0x54584e20, 0xffffffff, "TLC320AD9xC", NULL, NULL }, { 0x56494161, 0xffffffff, "VIA1612A", NULL, NULL }, // modified ICE1232 with S/PDIF { 0x56494170, 0xffffffff, "VIA1617A", patch_vt1617a, NULL }, // modified VT1616 with S/PDIF diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index b524f89a1f13..18d972501585 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -111,6 +111,9 @@ enum { /* 0x0009 - 0x0014 -> 12 test regs */ /* 0x0015 - visibility reg */ +/* Cirrus Logic CS4208 */ +#define CS4208_VENDOR_NID 0x24 + /* * Cirrus Logic CS4210 * @@ -223,6 +226,16 @@ static const struct hda_verb cs_coef_init_verbs[] = { {} /* terminator */ }; +static const struct hda_verb cs4208_coef_init_verbs[] = { + {0x01, AC_VERB_SET_POWER_STATE, 0x00}, /* AFG: D0 */ + {0x24, AC_VERB_SET_PROC_STATE, 0x01}, /* VPW: processing on */ + {0x24, AC_VERB_SET_COEF_INDEX, 0x0033}, + {0x24, AC_VERB_SET_PROC_COEF, 0x0001}, /* A1 ICS */ + {0x24, AC_VERB_SET_COEF_INDEX, 0x0034}, + {0x24, AC_VERB_SET_PROC_COEF, 0x1C01}, /* A1 Enable, A Thresh = 300mV */ + {} /* terminator */ +}; + /* Errata: CS4207 rev C0/C1/C2 Silicon * * http://www.cirrus.com/en/pubs/errata/ER880C3.pdf @@ -295,6 +308,8 @@ static int cs_init(struct hda_codec *codec) /* init_verb sequence for C0/C1/C2 errata*/ snd_hda_sequence_write(codec, cs_errata_init_verbs); snd_hda_sequence_write(codec, cs_coef_init_verbs); + } else if (spec->vendor_nid == CS4208_VENDOR_NID) { + snd_hda_sequence_write(codec, cs4208_coef_init_verbs); } snd_hda_gen_init(codec); @@ -434,6 +449,29 @@ static const struct hda_pintbl mba42_pincfgs[] = { {} /* terminator */ }; +static const struct hda_pintbl mba6_pincfgs[] = { + { 0x10, 0x032120f0 }, /* HP */ + { 0x11, 0x500000f0 }, + { 0x12, 0x90100010 }, /* Speaker */ + { 0x13, 0x500000f0 }, + { 0x14, 0x500000f0 }, + { 0x15, 0x770000f0 }, + { 0x16, 0x770000f0 }, + { 0x17, 0x430000f0 }, + { 0x18, 0x43ab9030 }, /* Mic */ + { 0x19, 0x770000f0 }, + { 0x1a, 0x770000f0 }, + { 0x1b, 0x770000f0 }, + { 0x1c, 0x90a00090 }, + { 0x1d, 0x500000f0 }, + { 0x1e, 0x500000f0 }, + { 0x1f, 0x500000f0 }, + { 0x20, 0x500000f0 }, + { 0x21, 0x430000f0 }, + { 0x22, 0x430000f0 }, + {} /* terminator */ +}; + static void cs420x_fixup_gpio_13(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -556,22 +594,23 @@ static int patch_cs420x(struct hda_codec *codec) /* * CS4208 support: - * Its layout is no longer compatible with CS4206/CS4207, and the generic - * parser seems working fairly well, except for trivial fixups. + * Its layout is no longer compatible with CS4206/CS4207 */ enum { + CS4208_MBA6, CS4208_GPIO0, }; static const struct hda_model_fixup cs4208_models[] = { { .id = CS4208_GPIO0, .name = "gpio0" }, + { .id = CS4208_MBA6, .name = "mba6" }, {} }; static const struct snd_pci_quirk cs4208_fixup_tbl[] = { /* codec SSID */ - SND_PCI_QUIRK(0x106b, 0x7100, "MacBookPro 6,1", CS4208_GPIO0), - SND_PCI_QUIRK(0x106b, 0x7200, "MacBookPro 6,2", CS4208_GPIO0), + SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6), + SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6), {} /* terminator */ }; @@ -588,18 +627,35 @@ static void cs4208_fixup_gpio0(struct hda_codec *codec, } static const struct hda_fixup cs4208_fixups[] = { + [CS4208_MBA6] = { + .type = HDA_FIXUP_PINS, + .v.pins = mba6_pincfgs, + .chained = true, + .chain_id = CS4208_GPIO0, + }, [CS4208_GPIO0] = { .type = HDA_FIXUP_FUNC, .v.func = cs4208_fixup_gpio0, }, }; +/* correct the 0dB offset of input pins */ +static void cs4208_fix_amp_caps(struct hda_codec *codec, hda_nid_t adc) +{ + unsigned int caps; + + caps = query_amp_caps(codec, adc, HDA_INPUT); + caps &= ~(AC_AMPCAP_OFFSET); + caps |= 0x02; + snd_hda_override_amp_caps(codec, adc, HDA_INPUT, caps); +} + static int patch_cs4208(struct hda_codec *codec) { struct cs_spec *spec; int err; - spec = cs_alloc_spec(codec, 0); /* no specific w/a */ + spec = cs_alloc_spec(codec, CS4208_VENDOR_NID); if (!spec) return -ENOMEM; @@ -609,6 +665,12 @@ static int patch_cs4208(struct hda_codec *codec) cs4208_fixups); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); + snd_hda_override_wcaps(codec, 0x18, + get_wcaps(codec, 0x18) | AC_WCAP_STEREO); + cs4208_fix_amp_caps(codec, 0x18); + cs4208_fix_amp_caps(codec, 0x1b); + cs4208_fix_amp_caps(codec, 0x1c); + err = cs_parse_auto_config(codec); if (err < 0) goto error; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4edd2d0f9a3c..ec68eaea0336 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3231,6 +3231,7 @@ enum { CXT_FIXUP_INC_MIC_BOOST, CXT_FIXUP_HEADPHONE_MIC_PIN, CXT_FIXUP_HEADPHONE_MIC, + CXT_FIXUP_GPIO1, }; static void cxt_fixup_stereo_dmic(struct hda_codec *codec, @@ -3375,6 +3376,15 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_headphone_mic, }, + [CXT_FIXUP_GPIO1] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x01 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x01 }, + { 0x01, AC_VERB_SET_GPIO_DATA, 0x01 }, + { } + }, + }, }; static const struct snd_pci_quirk cxt5051_fixups[] = { @@ -3384,6 +3394,7 @@ static const struct snd_pci_quirk cxt5051_fixups[] = { static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_GPIO1), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 3d8cd04455a6..50173d412ac5 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -937,6 +937,14 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, } /* + * always configure channel mapping, it may have been changed by the + * user in the meantime + */ + hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, + channels, per_pin->chmap, + per_pin->chmap_set); + + /* * sizeof(ai) is used instead of sizeof(*hdmi_ai) or * sizeof(*dp_ai) to avoid partial match/update problems when * the user switches between HDMI/DP monitors. @@ -947,20 +955,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, "pin=%d channels=%d\n", pin_nid, channels); - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, - channels, per_pin->chmap, - per_pin->chmap_set); hdmi_stop_infoframe_trans(codec, pin_nid); hdmi_fill_audio_infoframe(codec, pin_nid, ai.bytes, sizeof(ai)); hdmi_start_infoframe_trans(codec, pin_nid); - } else { - /* For non-pcm audio switch, setup new channel mapping - * accordingly */ - if (per_pin->non_pcm != non_pcm) - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, - channels, per_pin->chmap, - per_pin->chmap_set); } per_pin->non_pcm = non_pcm; @@ -1149,32 +1147,43 @@ static int hdmi_choose_cvt(struct hda_codec *codec, } static void haswell_config_cvts(struct hda_codec *codec, - int pin_id, int mux_id) + hda_nid_t pin_nid, int mux_idx) { struct hdmi_spec *spec = codec->spec; - struct hdmi_spec_per_pin *per_pin; - int pin_idx, mux_idx; - int curr; - int err; + hda_nid_t nid, end_nid; + int cvt_idx, curr; + struct hdmi_spec_per_cvt *per_cvt; - for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { - per_pin = get_pin(spec, pin_idx); + /* configure all pins, including "no physical connection" ones */ + end_nid = codec->start_nid + codec->num_nodes; + for (nid = codec->start_nid; nid < end_nid; nid++) { + unsigned int wid_caps = get_wcaps(codec, nid); + unsigned int wid_type = get_wcaps_type(wid_caps); + + if (wid_type != AC_WID_PIN) + continue; - if (pin_idx == pin_id) + if (nid == pin_nid) continue; - curr = snd_hda_codec_read(codec, per_pin->pin_nid, 0, + curr = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONNECT_SEL, 0); + if (curr != mux_idx) + continue; - /* Choose another unused converter */ - if (curr == mux_id) { - err = hdmi_choose_cvt(codec, pin_idx, NULL, &mux_idx); - if (err < 0) - return; - snd_printdd("HDMI: choose converter %d for pin %d\n", mux_idx, pin_idx); - snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0, + /* choose an unassigned converter. The conveters in the + * connection list are in the same order as in the codec. + */ + for (cvt_idx = 0; cvt_idx < spec->num_cvts; cvt_idx++) { + per_cvt = get_cvt(spec, cvt_idx); + if (!per_cvt->assigned) { + snd_printdd("choose cvt %d for pin nid %d\n", + cvt_idx, nid); + snd_hda_codec_write_cache(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, - mux_idx); + cvt_idx); + break; + } } } } @@ -1216,7 +1225,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, /* configure unused pins to choose other converters */ if (is_haswell(codec)) - haswell_config_cvts(codec, pin_idx, mux_idx); + haswell_config_cvts(codec, per_pin->pin_nid, mux_idx); snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bc07d369fac4..bf313bea7085 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2819,6 +2819,15 @@ static void alc269_fixup_hweq(struct hda_codec *codec, alc_write_coef_idx(codec, 0x1e, coef | 0x80); } +static void alc269_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; +} + static void alc271_fixup_dmic(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3439,6 +3448,9 @@ static void alc283_fixup_chromebook(struct hda_codec *codec, /* Set to manual mode */ val = alc_read_coef_idx(codec, 0x06); alc_write_coef_idx(codec, 0x06, val & ~0x000c); + /* Enable Line1 input control by verb */ + val = alc_read_coef_idx(codec, 0x1a); + alc_write_coef_idx(codec, 0x1a, val | (1 << 4)); break; } } @@ -3493,6 +3505,15 @@ static void alc282_fixup_asus_tx300(struct hda_codec *codec, } } +static void alc290_fixup_mono_speakers(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + /* Remove DAC node 0x03, as it seems to be + giving mono output */ + snd_hda_override_wcaps(codec, 0x03, 0); +} + enum { ALC269_FIXUP_SONY_VAIO, ALC275_FIXUP_SONY_VAIO_GPIO2, @@ -3504,6 +3525,7 @@ enum { ALC271_FIXUP_DMIC, ALC269_FIXUP_PCM_44K, ALC269_FIXUP_STEREO_DMIC, + ALC269_FIXUP_HEADSET_MIC, ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_AMIC, @@ -3516,9 +3538,11 @@ enum { ALC269_FIXUP_HP_GPIO_LED, ALC269_FIXUP_INV_DMIC, ALC269_FIXUP_LENOVO_DOCK, + ALC286_FIXUP_SONY_MIC_NO_PRESENCE, ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, + ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC269_FIXUP_ASUS_X101_FUNC, @@ -3531,6 +3555,8 @@ enum { ALC269VB_FIXUP_ORDISSIMO_EVE2, ALC283_FIXUP_CHROME_BOOK, ALC282_FIXUP_ASUS_TX300, + ALC283_FIXUP_INT_MIC, + ALC290_FIXUP_MONO_SPEAKERS, }; static const struct hda_fixup alc269_fixups[] = { @@ -3599,6 +3625,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_stereo_dmic, }, + [ALC269_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_headset_mic, + }, [ALC269_FIXUP_QUANTA_MUTE] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_quanta_mute, @@ -3708,6 +3738,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC269_FIXUP_DELL3_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, [ALC269_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, @@ -3716,6 +3755,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, [ALC269_FIXUP_ASUS_X101_FUNC] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_x101_headset_mic, @@ -3790,6 +3838,22 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc282_fixup_asus_tx300, }, + [ALC283_FIXUP_INT_MIC] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + {0x20, AC_VERB_SET_COEF_INDEX, 0x1a}, + {0x20, AC_VERB_SET_PROC_COEF, 0x0011}, + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST + }, + [ALC290_FIXUP_MONO_SPEAKERS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc290_fixup_mono_speakers, + .chained = true, + .chain_id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -3831,6 +3895,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -3853,6 +3918,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8516, "ASUS X101CH", ALC269_FIXUP_ASUS_X101), + SND_PCI_QUIRK(0x104d, 0x90b6, "Sony VAIO Pro 13", ALC286_FIXUP_SONY_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIXUP_SONY_VAIO_GPIO2), SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), @@ -3874,7 +3940,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), - SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), @@ -3938,6 +4004,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC269_FIXUP_STEREO_DMIC, .name = "alc269-dmic"}, {.id = ALC271_FIXUP_DMIC, .name = "alc271-dmic"}, {.id = ALC269_FIXUP_INV_DMIC, .name = "inv-dmic"}, + {.id = ALC269_FIXUP_HEADSET_MIC, .name = "headset-mic"}, {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"}, {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, @@ -4555,6 +4622,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), diff --git a/sound/soc/blackfin/bf6xx-i2s.c b/sound/soc/blackfin/bf6xx-i2s.c index c02405cc007d..5810a0603f2f 100644 --- a/sound/soc/blackfin/bf6xx-i2s.c +++ b/sound/soc/blackfin/bf6xx-i2s.c @@ -88,6 +88,7 @@ static int bfin_i2s_hw_params(struct snd_pcm_substream *substream, case SNDRV_PCM_FORMAT_S8: param.spctl |= 0x70; sport->wdsize = 1; + break; case SNDRV_PCM_FORMAT_S16_LE: param.spctl |= 0xf0; sport->wdsize = 2; diff --git a/sound/soc/codecs/88pm860x-codec.c b/sound/soc/codecs/88pm860x-codec.c index 8af04343cc1a..259d1ac4492f 100644 --- a/sound/soc/codecs/88pm860x-codec.c +++ b/sound/soc/codecs/88pm860x-codec.c @@ -349,6 +349,9 @@ static int snd_soc_put_volsw_2r_st(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; val2 = ucontrol->value.integer.value[1]; + if (val >= ARRAY_SIZE(st_table) || val2 >= ARRAY_SIZE(st_table)) + return -EINVAL; + err = snd_soc_update_bits(codec, reg, 0x3f, st_table[val].m); if (err < 0) return err; diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index b8ba0adacfce..80555d7551e6 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -1225,13 +1225,18 @@ static int anc_status_control_put(struct snd_kcontrol *kcontrol, struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev); struct device *dev = codec->dev; bool apply_fir, apply_iir; - int req, status; + unsigned int req; + int status; dev_dbg(dev, "%s: Enter.\n", __func__); mutex_lock(&drvdata->anc_lock); req = ucontrol->value.integer.value[0]; + if (req >= ARRAY_SIZE(enum_anc_state)) { + status = -EINVAL; + goto cleanup; + } if (req != ANC_APPLY_FIR_IIR && req != ANC_APPLY_FIR && req != ANC_APPLY_IIR) { dev_err(dev, "%s: ERROR: Unsupported status to set '%s'!\n", diff --git a/sound/soc/codecs/max98095.c b/sound/soc/codecs/max98095.c index 41cdd1642970..8dbcacd44e6a 100644 --- a/sound/soc/codecs/max98095.c +++ b/sound/soc/codecs/max98095.c @@ -1863,7 +1863,7 @@ static int max98095_put_eq_enum(struct snd_kcontrol *kcontrol, struct max98095_pdata *pdata = max98095->pdata; int channel = max98095_get_eq_channel(kcontrol->id.name); struct max98095_cdata *cdata; - int sel = ucontrol->value.integer.value[0]; + unsigned int sel = ucontrol->value.integer.value[0]; struct max98095_eq_cfg *coef_set; int fs, best, best_val, i; int regmask, regsave; @@ -2016,7 +2016,7 @@ static int max98095_put_bq_enum(struct snd_kcontrol *kcontrol, struct max98095_pdata *pdata = max98095->pdata; int channel = max98095_get_bq_channel(codec, kcontrol->id.name); struct max98095_cdata *cdata; - int sel = ucontrol->value.integer.value[0]; + unsigned int sel = ucontrol->value.integer.value[0]; struct max98095_biquad_cfg *coef_set; int fs, best, best_val, i; int regmask, regsave; diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 46c5b4fdfc52..ca1be1d9dcf0 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -62,7 +62,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) struct device_node *ssi_np, *codec_np; struct platform_device *ssi_pdev; struct i2c_client *codec_dev; - struct imx_sgtl5000_data *data; + struct imx_sgtl5000_data *data = NULL; int int_port, ext_port; int ret; @@ -128,7 +128,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) goto fail; } - data->codec_clk = devm_clk_get(&codec_dev->dev, NULL); + data->codec_clk = clk_get(&codec_dev->dev, NULL); if (IS_ERR(data->codec_clk)) { ret = PTR_ERR(data->codec_clk); goto fail; @@ -172,6 +172,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) return 0; fail: + if (data && !IS_ERR(data->codec_clk)) + clk_put(data->codec_clk); if (ssi_np) of_node_put(ssi_np); if (codec_np) @@ -185,6 +187,7 @@ static int imx_sgtl5000_remove(struct platform_device *pdev) struct imx_sgtl5000_data *data = platform_get_drvdata(pdev); snd_soc_unregister_card(&data->card); + clk_put(data->codec_clk); return 0; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 4d0561312f3b..1a38be0d0ca8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1380,7 +1380,6 @@ static int soc_probe_link_dais(struct snd_soc_card *card, int num, int order) return -ENODEV; list_add(&cpu_dai->dapm.list, &card->dapm_list); - snd_soc_dapm_new_dai_widgets(&cpu_dai->dapm, cpu_dai); } if (cpu_dai->driver->probe) { diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index 63fb5219f0f8..6234a51625b1 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -299,19 +299,6 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y, usX2Y_clients_stop(usX2Y); } -static void usX2Y_error_sequence(struct usX2Ydev *usX2Y, - struct snd_usX2Y_substream *subs, struct urb *urb) -{ - snd_printk(KERN_ERR -"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n" -"Most probably some urb of usb-frame %i is still missing.\n" -"Cause could be too long delays in usb-hcd interrupt handling.\n", - usb_get_current_frame_number(usX2Y->dev), - subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out", - usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame); - usX2Y_clients_stop(usX2Y); -} - static void i_usX2Y_urb_complete(struct urb *urb) { struct snd_usX2Y_substream *subs = urb->context; @@ -328,12 +315,9 @@ static void i_usX2Y_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) - subs->completed_urb = urb; - else { - usX2Y_error_sequence(usX2Y, subs, urb); - return; - } + + subs->completed_urb = urb; + { struct snd_usX2Y_substream *capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE], *playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK]; diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index f2a1acdc4d83..814d0e887c62 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -244,13 +244,8 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) - subs->completed_urb = urb; - else { - usX2Y_error_sequence(usX2Y, subs, urb); - return; - } + subs->completed_urb = urb; capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE]; capsubs2 = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE + 2]; playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK]; diff --git a/tools/lib/lk/debugfs.c b/tools/lib/lk/debugfs.c index 099e7cd022e4..7c4347962353 100644 --- a/tools/lib/lk/debugfs.c +++ b/tools/lib/lk/debugfs.c @@ -5,7 +5,6 @@ #include <stdbool.h> #include <sys/vfs.h> #include <sys/mount.h> -#include <linux/magic.h> #include <linux/kernel.h> #include "debugfs.h" diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3a0ff7fb71b6..64c043b7a438 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -770,6 +770,7 @@ check: $(OUTPUT)common-cmds.h install-bin: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBPERL $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 9570c2b0f83c..b2519e49424f 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) { - bool cap_usr_time_zero; + bool cap_user_time_zero; u32 seq; int i = 0; @@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, tc->time_mult = pc->time_mult; tc->time_shift = pc->time_shift; tc->time_zero = pc->time_zero; - cap_usr_time_zero = pc->cap_usr_time_zero; + cap_user_time_zero = pc->cap_user_time_zero; rmb(); if (pc->lock == seq && !(seq & 1)) break; @@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } } - if (!cap_usr_time_zero) + if (!cap_user_time_zero) return -EOPNOTSUPP; return 0; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 423875c999b2..afe377b2884f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -321,8 +321,6 @@ found: return perf_event__repipe(tool, event_sw, &sample_sw, machine); } -extern volatile int session_done; - static void sig_handler(int sig __maybe_unused) { session_done = 1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index c2dff9cb1f2c..9b5f077fee5b 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -101,7 +101,7 @@ static int setup_cpunode_map(void) dir1 = opendir(PATH_SYS_NODE); if (!dir1) - return -1; + return 0; while ((dent1 = readdir(dir1)) != NULL) { if (dent1->d_type != DT_DIR || diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e50d8d77419..72eae7498c09 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -401,8 +401,6 @@ static int perf_report__setup_sample_type(struct perf_report *rep) return 0; } -extern volatile int session_done; - static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -568,6 +566,9 @@ static int __cmd_report(struct perf_report *rep) } } + if (session_done()) + return 0; + if (nr_samples == 0) { ui__error("The %s file has no samples!\n", session->filename); return 0; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7f31a3ded1b6..9c333ff3dfeb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -553,8 +553,6 @@ static struct perf_tool perf_script = { .ordering_requires_timestamps = true, }; -extern volatile int session_done; - static void sig_handler(int sig __maybe_unused) { session_done = 1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f686d5ff594e..5098f144b92d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -457,6 +457,7 @@ static int __run_perf_stat(int argc, const char **argv) perror("failed to prepare workload"); return -1; } + child_pid = evsel_list->workload.pid; } if (group) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f5aa6375e3e9..71aa3e35406b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,6 +16,23 @@ #include <sys/mman.h> #include <linux/futex.h> +/* For older distros: */ +#ifndef MAP_STACK +# define MAP_STACK 0x20000 +#endif + +#ifndef MADV_HWPOISON +# define MADV_HWPOISON 100 +#endif + +#ifndef MADV_MERGEABLE +# define MADV_MERGEABLE 12 +#endif + +#ifndef MADV_UNMERGEABLE +# define MADV_UNMERGEABLE 13 +#endif + static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg, u8 arg_idx __maybe_unused, @@ -1038,6 +1055,7 @@ static int trace__replay(struct trace *trace) trace->tool.sample = trace__process_sample; trace->tool.mmap = perf_event__process_mmap; + trace->tool.mmap2 = perf_event__process_mmap2; trace->tool.comm = perf_event__process_comm; trace->tool.exit = perf_event__process_exit; trace->tool.fork = perf_event__process_fork; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 214e17e97e5c..5f6f9b3271bb 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -87,7 +87,7 @@ CFLAGS += -Wall CFLAGS += -Wextra CFLAGS += -std=gnu99 -EXTLIBS = -lelf -lpthread -lrt -lm +EXTLIBS = -lelf -lpthread -lrt -lm -ldl ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) CFLAGS += -fstack-protector-all @@ -180,6 +180,9 @@ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) CFLAGS += -DLIBELF_MMAP endif +ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y) + CFLAGS += -DHAVE_ELF_GETPHDRNUM +endif # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 708fb8e9822a..f79305739ecc 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -61,6 +61,15 @@ int main(void) } endef +define SOURCE_ELF_GETPHDRNUM +#include <libelf.h> +int main(void) +{ + size_t dst; + return elf_getphdrnum(0, &dst); +} +endef + ifndef NO_SLANG define SOURCE_SLANG #include <slang.h> @@ -210,6 +219,7 @@ define SOURCE_LIBAUDIT int main(void) { + printf(\"error message: %s\", audit_errno_to_name(0)); return audit_open(); } endef diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bfc5a27597d6..7eae5488ecea 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -809,7 +809,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, end = map__rip_2objdump(map, sym->end); offset = line_ip - start; - if (offset < 0 || (u64)line_ip > end) + if ((u64)line_ip < start || (u64)line_ip > end) offset = -1; else parsed_line = tmp2 + 1; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 3e5f5430a28a..7defd77105d0 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -263,6 +263,21 @@ bool die_is_signed_type(Dwarf_Die *tp_die) } /** + * die_is_func_def - Ensure that this DIE is a subprogram and definition + * @dw_die: a DIE + * + * Ensure that this DIE is a subprogram and NOT a declaration. This + * returns true if @dw_die is a function definition. + **/ +bool die_is_func_def(Dwarf_Die *dw_die) +{ + Dwarf_Attribute attr; + + return (dwarf_tag(dw_die) == DW_TAG_subprogram && + dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); +} + +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure @@ -392,6 +407,10 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) { struct __addr_die_search_param *ad = data; + /* + * Since a declaration entry doesn't has given pc, this always returns + * function definition entry. + */ if (dwarf_tag(fn_die) == DW_TAG_subprogram && dwarf_haspc(fn_die, ad->addr)) { memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); @@ -407,7 +426,7 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) * @die_mem: a buffer for result DIE * * Search a non-inlined function DIE which includes @addr. Stores the - * DIE to @die_mem and returns it if found. Returns NULl if failed. + * DIE to @die_mem and returns it if found. Returns NULL if failed. */ Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem) @@ -435,15 +454,32 @@ static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data) } /** + * die_find_top_inlinefunc - Search the top inlined function at given address + * @sp_die: a subprogram DIE which including @addr + * @addr: target address + * @die_mem: a buffer for result DIE + * + * Search an inlined function DIE which includes @addr. Stores the + * DIE to @die_mem and returns it if found. Returns NULL if failed. + * Even if several inlined functions are expanded recursively, this + * doesn't trace it down, and returns the topmost one. + */ +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) +{ + return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); +} + +/** * die_find_inlinefunc - Search an inlined function at given address - * @cu_die: a CU DIE which including @addr + * @sp_die: a subprogram DIE which including @addr * @addr: target address * @die_mem: a buffer for result DIE * * Search an inlined function DIE which includes @addr. Stores the - * DIE to @die_mem and returns it if found. Returns NULl if failed. + * DIE to @die_mem and returns it if found. Returns NULL if failed. * If several inlined functions are expanded recursively, this trace - * it and returns deepest one. + * it down and returns deepest one. */ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_mem) diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 6ce1717784b7..b4fe90c6cb2d 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -38,6 +38,9 @@ extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, int (*callback)(Dwarf_Die *, void *), void *data); +/* Ensure that this DIE is a subprogram and definition (not declaration) */ +extern bool die_is_func_def(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); @@ -76,7 +79,11 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem); -/* Search an inlined function including given address */ +/* Search the top inlined function including given address */ +extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); + +/* Search the deepest inlined function including given address */ extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_mem); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 26441d0e571b..c3e5a3b817ab 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -199,9 +199,11 @@ static int write_buildid(char *name, size_t name_len, u8 *build_id, return write_padded(fd, name, name_len + 1, len); } -static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, - u16 misc, int fd) +static int __dsos__write_buildid_table(struct list_head *head, + struct machine *machine, + pid_t pid, u16 misc, int fd) { + char nm[PATH_MAX]; struct dso *pos; dsos__for_each_with_build_id(pos, head) { @@ -215,6 +217,10 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, if (is_vdso_map(pos->short_name)) { name = (char *) VDSO__MAP_NAME; name_len = sizeof(VDSO__MAP_NAME) + 1; + } else if (dso__is_kcore(pos)) { + machine__mmap_name(machine, nm, sizeof(nm)); + name = nm; + name_len = strlen(nm) + 1; } else { name = pos->long_name; name_len = pos->long_name_len + 1; @@ -240,10 +246,10 @@ static int machine__write_buildid_table(struct machine *machine, int fd) umisc = PERF_RECORD_MISC_GUEST_USER; } - err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, - kmisc, fd); + err = __dsos__write_buildid_table(&machine->kernel_dsos, machine, + machine->pid, kmisc, fd); if (err == 0) - err = __dsos__write_buildid_table(&machine->user_dsos, + err = __dsos__write_buildid_table(&machine->user_dsos, machine, machine->pid, umisc, fd); return err; } @@ -375,23 +381,31 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine, + const char *debugdir) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = is_vdso_map(dso->short_name); + char *name = dso->long_name; + char nm[PATH_MAX]; - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), - dso->long_name, debugdir, - is_kallsyms, is_vdso); + if (dso__is_kcore(dso)) { + is_kallsyms = true; + machine__mmap_name(machine, nm, sizeof(nm)); + name = nm; + } + return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, + debugdir, is_kallsyms, is_vdso); } -static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) +static int __dsos__cache_build_ids(struct list_head *head, + struct machine *machine, const char *debugdir) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, debugdir)) + if (dso__cache_build_id(pos, machine, debugdir)) err = -1; return err; @@ -399,8 +413,9 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) static int machine__cache_build_ids(struct machine *machine, const char *debugdir) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine, + debugdir); + ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir); return ret; } @@ -2753,6 +2768,18 @@ int perf_session__read_header(struct perf_session *session) if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; + /* + * Sanity check that perf.data was written cleanly; data size is + * initialized to 0 and updated only if the on_exit function is run. + * If data size is still 0 then the file contains only partial + * information. Just warn user and process it as much as it can. + */ + if (f_header.data.size == 0) { + pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" + "Was the 'perf record' command properly terminated?\n", + session->filename); + } + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 46a0d35a05e1..9ff6cf3e9a99 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -611,6 +611,8 @@ void hists__collapse_resort(struct hists *hists) next = rb_first(root); while (next) { + if (session_done()) + break; n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 933d14f287ca..6188d2876a71 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -792,7 +792,7 @@ static int machine__create_modules(struct machine *machine) modules = path; } - if (symbol__restricted_filename(path, "/proc/modules")) + if (symbol__restricted_filename(modules, "/proc/modules")) return -1; file = fopen(modules, "r"); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index be0329394d56..c09e0a9fdf4c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -118,7 +118,6 @@ static const Dwfl_Callbacks offline_callbacks = { static int debuginfo__init_offline_dwarf(struct debuginfo *self, const char *path) { - Dwfl_Module *mod; int fd; fd = open(path, O_RDONLY); @@ -129,11 +128,11 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *self, if (!self->dwfl) goto error; - mod = dwfl_report_offline(self->dwfl, "", "", fd); - if (!mod) + self->mod = dwfl_report_offline(self->dwfl, "", "", fd); + if (!self->mod) goto error; - self->dbg = dwfl_module_getdwarf(mod, &self->bias); + self->dbg = dwfl_module_getdwarf(self->mod, &self->bias); if (!self->dbg) goto error; @@ -676,37 +675,42 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) } /* Convert subprogram DIE to trace point */ -static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, - bool retprobe, struct probe_trace_point *tp) +static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, + Dwarf_Addr paddr, bool retprobe, + struct probe_trace_point *tp) { Dwarf_Addr eaddr, highaddr; - const char *name; - - /* Copy the name of probe point */ - name = dwarf_diename(sp_die); - if (name) { - if (dwarf_entrypc(sp_die, &eaddr) != 0) { - pr_warning("Failed to get entry address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (dwarf_highpc(sp_die, &highaddr) != 0) { - pr_warning("Failed to get end address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (paddr > highaddr) { - pr_warning("Offset specified is greater than size of %s\n", - dwarf_diename(sp_die)); - return -EINVAL; - } - tp->symbol = strdup(name); - if (tp->symbol == NULL) - return -ENOMEM; - tp->offset = (unsigned long)(paddr - eaddr); - } else - /* This function has no name. */ - tp->offset = (unsigned long)paddr; + GElf_Sym sym; + const char *symbol; + + /* Verify the address is correct */ + if (dwarf_entrypc(sp_die, &eaddr) != 0) { + pr_warning("Failed to get entry address of %s\n", + dwarf_diename(sp_die)); + return -ENOENT; + } + if (dwarf_highpc(sp_die, &highaddr) != 0) { + pr_warning("Failed to get end address of %s\n", + dwarf_diename(sp_die)); + return -ENOENT; + } + if (paddr > highaddr) { + pr_warning("Offset specified is greater than size of %s\n", + dwarf_diename(sp_die)); + return -EINVAL; + } + + /* Get an appropriate symbol from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (!symbol) { + pr_warning("Failed to find symbol at 0x%lx\n", + (unsigned long)paddr); + return -ENOENT; + } + tp->offset = (unsigned long)(paddr - sym.st_value); + tp->symbol = strdup(symbol); + if (!tp->symbol) + return -ENOMEM; /* Return probe must be on the head of a subprogram */ if (retprobe) { @@ -734,7 +738,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) } /* If not a real subprogram, find a real one */ - if (dwarf_tag(sc_die) != DW_TAG_subprogram) { + if (!die_is_func_def(sc_die)) { if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { pr_warning("Failed to find probe point in any " "functions.\n"); @@ -980,12 +984,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct probe_finder *pf = param->data; struct perf_probe_point *pp = &pf->pev->point; - Dwarf_Attribute attr; /* Check tag and diename */ - if (dwarf_tag(sp_die) != DW_TAG_subprogram || - !die_compare_name(sp_die, pp->function) || - dwarf_attr(sp_die, DW_AT_declaration, &attr)) + if (!die_is_func_def(sp_die) || + !die_compare_name(sp_die, pp->function)) return DWARF_CB_OK; /* Check declared file */ @@ -1151,7 +1153,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) tev = &tf->tevs[tf->ntevs++]; /* Trace point should be converted from subprogram DIE */ - ret = convert_to_trace_point(&pf->sp_die, pf->addr, + ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr, pf->pev->point.retprobe, &tev->point); if (ret < 0) return ret; @@ -1183,7 +1185,7 @@ int debuginfo__find_trace_events(struct debuginfo *self, { struct trace_event_finder tf = { .pf = {.pev = pev, .callback = add_probe_trace_event}, - .max_tevs = max_tevs}; + .mod = self->mod, .max_tevs = max_tevs}; int ret; /* Allocate result tevs array */ @@ -1252,7 +1254,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) vl = &af->vls[af->nvls++]; /* Trace point should be converted from subprogram DIE */ - ret = convert_to_trace_point(&pf->sp_die, pf->addr, + ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr, pf->pev->point.retprobe, &vl->point); if (ret < 0) return ret; @@ -1291,6 +1293,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self, { struct available_var_finder af = { .pf = {.pev = pev, .callback = add_available_vars}, + .mod = self->mod, .max_vls = max_vls, .externs = externs}; int ret; @@ -1324,8 +1327,8 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, struct perf_probe_point *ppt) { Dwarf_Die cudie, spdie, indie; - Dwarf_Addr _addr, baseaddr; - const char *fname = NULL, *func = NULL, *tmp; + Dwarf_Addr _addr = 0, baseaddr = 0; + const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; /* Adjust address with bias */ @@ -1346,27 +1349,36 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { /* Get function entry information */ - tmp = dwarf_diename(&spdie); - if (!tmp || + func = basefunc = dwarf_diename(&spdie); + if (!func || dwarf_entrypc(&spdie, &baseaddr) != 0 || - dwarf_decl_line(&spdie, &baseline) != 0) + dwarf_decl_line(&spdie, &baseline) != 0) { + lineno = 0; goto post; - func = tmp; + } - if (addr == (unsigned long)baseaddr) + if (addr == (unsigned long)baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; - else if (die_find_inlinefunc(&spdie, (Dwarf_Addr)addr, - &indie)) { + fname = dwarf_decl_file(&spdie); + goto post; + } + + /* Track down the inline functions step by step */ + while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr, + &indie)) { + /* There is an inline function */ if (dwarf_entrypc(&indie, &_addr) == 0 && - _addr == addr) + _addr == addr) { /* * addr is at an inline function entry. * In this case, lineno should be the call-site - * line number. + * line number. (overwrite lineinfo) */ lineno = die_get_call_lineno(&indie); - else { + fname = die_get_call_file(&indie); + break; + } else { /* * addr is in an inline function body. * Since lineno points one of the lines @@ -1374,19 +1386,27 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, * be the entry line of the inline function. */ tmp = dwarf_diename(&indie); - if (tmp && - dwarf_decl_line(&spdie, &baseline) == 0) - func = tmp; + if (!tmp || + dwarf_decl_line(&indie, &baseline) != 0) + break; + func = tmp; + spdie = indie; } } + /* Verify the lineno and baseline are in a same file */ + tmp = dwarf_decl_file(&spdie); + if (!tmp || strcmp(tmp, fname) != 0) + lineno = 0; } post: /* Make a relative line number or an offset */ if (lineno) ppt->line = lineno - baseline; - else if (func) + else if (basefunc) { ppt->offset = addr - (unsigned long)baseaddr; + func = basefunc; + } /* Duplicate strings */ if (func) { @@ -1474,7 +1494,7 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data) return 0; } -/* Search function from function name */ +/* Search function definition from function name */ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) { struct dwarf_callback_param *param = data; @@ -1485,7 +1505,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) return DWARF_CB_OK; - if (dwarf_tag(sp_die) == DW_TAG_subprogram && + if (die_is_func_def(sp_die) && die_compare_name(sp_die, lr->function)) { lf->fname = dwarf_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 17e94d0c36f9..3b7d63018960 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -23,6 +23,7 @@ static inline int is_c_varname(const char *name) /* debug information structure */ struct debuginfo { Dwarf *dbg; + Dwfl_Module *mod; Dwfl *dwfl; Dwarf_Addr bias; }; @@ -77,6 +78,7 @@ struct probe_finder { struct trace_event_finder { struct probe_finder pf; + Dwfl_Module *mod; /* For solving symbols */ struct probe_trace_event *tevs; /* Found trace events */ int ntevs; /* Number of trace events */ int max_tevs; /* Max number of trace events */ @@ -84,6 +86,7 @@ struct trace_event_finder { struct available_var_finder { struct probe_finder pf; + Dwfl_Module *mod; /* For solving symbols */ struct variable_list *vls; /* Found variable lists */ int nvls; /* Number of variable lists */ int max_vls; /* Max no. of variable lists */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 51f5edf2a6d0..568b750c01f6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -256,6 +256,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->sample = process_event_sample_stub; if (tool->mmap == NULL) tool->mmap = process_event_stub; + if (tool->mmap2 == NULL) + tool->mmap2 = process_event_stub; if (tool->comm == NULL) tool->comm = process_event_stub; if (tool->fork == NULL) @@ -531,6 +533,9 @@ static int flush_sample_queue(struct perf_session *s, return 0; list_for_each_entry_safe(iter, tmp, head, list) { + if (session_done()) + return 0; + if (iter->timestamp > limit) break; @@ -1160,7 +1165,6 @@ static void perf_session__warn_about_errors(const struct perf_session *session, } } -#define session_done() (*(volatile int *)(&session_done)) volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *self, @@ -1308,7 +1312,7 @@ int __perf_session__process_events(struct perf_session *session, file_offset = page_offset; head = data_offset - page_offset; - if (data_offset + data_size < file_size) + if (data_size && (data_offset + data_size < file_size)) file_size = data_offset + data_size; progress_next = file_size / 16; @@ -1372,10 +1376,13 @@ more: "Processing events..."); } + err = 0; + if (session_done()) + goto out_err; + if (file_pos < file_size) goto more; - err = 0; /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; err = flush_sample_queue(session, tool); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 3aa75fb2225f..04bf7373a7e5 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -124,4 +124,8 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, #define perf_session__set_tracepoints_handlers(session, array) \ __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) + +extern volatile int session_done; + +#define session_done() (*(volatile int *)(&session_done)) #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a7b9ab557380..a9c829be5216 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -8,6 +8,22 @@ #include "symbol.h" #include "debug.h" +#ifndef HAVE_ELF_GETPHDRNUM +static int elf_getphdrnum(Elf *elf, size_t *dst) +{ + GElf_Ehdr gehdr; + GElf_Ehdr *ehdr; + + ehdr = gelf_getehdr(elf, &gehdr); + if (!ehdr) + return -1; + + *dst = ehdr->e_phnum; + + return 0; +} +#endif + #ifndef NT_GNU_BUILD_ID #define NT_GNU_BUILD_ID 3 #endif diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index fe7a27d67d2b..e9e1c03f927d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -186,7 +186,7 @@ void parse_proc_kallsyms(struct pevent *pevent, char *next = NULL; char *addr_str; char *mod; - char *fmt; + char *fmt = NULL; line = strtok_r(file, "\n", &next); while (line) { diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8a39dda7a325..8631d9c14320 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) static void async_pf_execute(struct work_struct *work) { - struct page *page = NULL; struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct mm_struct *mm = apf->mm; @@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work) use_mm(mm); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->page = page; - apf->done = true; spin_unlock(&vcpu->async_pf.lock); /* @@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, page, gva); + trace_kvm_async_pf_completed(addr, gva); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); @@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) struct kvm_async_pf *work = list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); - cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) { /* work was canceled */ + if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); @@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } spin_unlock(&vcpu->async_pf.lock); @@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); - if (work->page) - kvm_arch_async_page_ready(vcpu, work); + kvm_arch_async_page_ready(vcpu, work); kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } } @@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, if (!work) return 0; - work->page = NULL; - work->done = false; + work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); @@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) if (!work) return -ENOMEM; - work->page = KVM_ERR_PTR_BAD_PAGE; + work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 72a130bc448a..0df7d4b34dfe 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) flags = IOMMU_READ; if (!(slot->flags & KVM_MEM_READONLY)) flags |= IOMMU_WRITE; - if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + if (!kvm->arch.iommu_noncoherent) flags |= IOMMU_CACHE; @@ -103,6 +103,10 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) while ((gfn << PAGE_SHIFT) & (page_size - 1)) page_size >>= 1; + /* Make sure hva is aligned to the page size we want to map */ + while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) + page_size >>= 1; + /* * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. @@ -140,6 +144,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + if (kvm->arch.iommu_noncoherent) + kvm_arch_register_noncoherent_dma(kvm); + idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); @@ -158,7 +165,8 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r, last_flags; + int r; + bool noncoherent; /* check if iommu exists and in use */ if (!domain) @@ -174,15 +182,13 @@ int kvm_assign_device(struct kvm *kvm, return r; } - last_flags = kvm->arch.iommu_flags; - if (iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_CACHE_COHERENCY)) - kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY); /* Check if need to update IOMMU page table for guest memory */ - if ((last_flags ^ kvm->arch.iommu_flags) == - KVM_IOMMU_CACHE_COHERENCY) { + if (noncoherent != kvm->arch.iommu_noncoherent) { kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_noncoherent = noncoherent; r = kvm_iommu_map_memslots(kvm); if (r) goto out_unmap; @@ -190,11 +196,7 @@ int kvm_assign_device(struct kvm *kvm, pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm assign device\n"); return 0; out_unmap: @@ -220,11 +222,7 @@ int kvm_deassign_device(struct kvm *kvm, pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm deassign device\n"); return 0; } @@ -336,6 +334,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); + if (kvm->arch.iommu_noncoherent) + kvm_arch_unregister_noncoherent_dma(kvm); + return 0; } @@ -350,6 +351,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); kvm->arch.iommu_domain = NULL; + kvm->arch.iommu_noncoherent = false; mutex_unlock(&kvm->slots_lock); iommu_domain_free(domain); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 979bff485fb0..10015d6e4d08 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -70,7 +70,8 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_RAW_SPINLOCK(kvm_lock); +DEFINE_SPINLOCK(kvm_lock); +static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } +EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); void kvm_reload_remote_mmus(struct kvm *kvm) { @@ -490,9 +492,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return kvm; @@ -540,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - kvm_arch_free_memslot(free, dont); + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; } @@ -557,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(memslot, NULL); + kvm_free_physmem_slot(kvm, memslot, NULL); kfree(kvm->memslots); } @@ -581,9 +583,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_del(&kvm->vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -821,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(&new, npages)) + if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; } @@ -872,6 +874,19 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } + /* actual memory is freed via old in kvm_free_physmem_slot below */ + if (change == KVM_MR_DELETE) { + new.dirty_bitmap = NULL; + memset(&new.arch, 0, sizeof(new.arch)); + } + + old_memslots = install_new_memslots(kvm, slots, &new); + + kvm_arch_commit_memory_region(kvm, mem, &old, change); + + kvm_free_physmem_slot(kvm, &old, &new); + kfree(old_memslots); + /* * IOMMU mapping: New slots need to be mapped. Old slots need to be * un-mapped and re-mapped if their base changes. Since base change @@ -883,29 +898,15 @@ int __kvm_set_memory_region(struct kvm *kvm, */ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { r = kvm_iommu_map_pages(kvm, &new); - if (r) - goto out_slots; - } - - /* actual memory is freed via old in kvm_free_physmem_slot below */ - if (change == KVM_MR_DELETE) { - new.dirty_bitmap = NULL; - memset(&new.arch, 0, sizeof(new.arch)); + return r; } - old_memslots = install_new_memslots(kvm, slots, &new); - - kvm_arch_commit_memory_region(kvm, mem, &old, change); - - kvm_free_physmem_slot(&old, &new); - kfree(old_memslots); - return 0; out_slots: kfree(slots); out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; } @@ -964,6 +965,7 @@ int kvm_get_dirty_log(struct kvm *kvm, out: return r; } +EXPORT_SYMBOL_GPL(kvm_get_dirty_log); bool kvm_largepages_enabled(void) { @@ -1064,10 +1066,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - if (writable) + unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); + + if (!kvm_is_error_hva(hva) && writable) *writable = !memslot_is_readonly(slot); - return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); + return hva; } static int kvm_read_hva(void *data, void __user *hva, int len) @@ -1652,6 +1656,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot(kvm, gfn); mark_page_dirty_in_slot(kvm, memslot, gfn); } +EXPORT_SYMBOL_GPL(mark_page_dirty); /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. @@ -1677,6 +1682,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); } +EXPORT_SYMBOL_GPL(kvm_vcpu_block); #ifndef CONFIG_S390 /* @@ -2269,6 +2275,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ops = &kvm_xics_ops; break; #endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; +#endif default: return -ENODEV; } @@ -2517,44 +2528,12 @@ out: } #endif -static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page[1]; - unsigned long addr; - int npages; - gfn_t gfn = vmf->pgoff; - struct kvm *kvm = vma->vm_file->private_data; - - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return VM_FAULT_SIGBUS; - - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, - NULL); - if (unlikely(npages != 1)) - return VM_FAULT_SIGBUS; - - vmf->page = page[0]; - return 0; -} - -static const struct vm_operations_struct kvm_vm_vm_ops = { - .fault = kvm_vm_fault, -}; - -static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_vm_vm_ops; - return 0; -} - static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = kvm_vm_compat_ioctl, #endif - .mmap = kvm_vm_mmap, .llseek = noop_llseek, }; @@ -2681,11 +2660,12 @@ static void hardware_enable_nolock(void *junk) } } -static void hardware_enable(void *junk) +static void hardware_enable(void) { - raw_spin_lock(&kvm_lock); - hardware_enable_nolock(junk); - raw_spin_unlock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); + if (kvm_usage_count) + hardware_enable_nolock(NULL); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_nolock(void *junk) @@ -2698,11 +2678,12 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(NULL); } -static void hardware_disable(void *junk) +static void hardware_disable(void) { - raw_spin_lock(&kvm_lock); - hardware_disable_nolock(junk); - raw_spin_unlock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); + if (kvm_usage_count) + hardware_disable_nolock(NULL); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_all_nolock(void) @@ -2716,16 +2697,16 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static int hardware_enable_all(void) { int r = 0; - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -2738,7 +2719,7 @@ static int hardware_enable_all(void) } } - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); return r; } @@ -2748,20 +2729,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, { int cpu = (long)v; - if (!kvm_usage_count) - return NOTIFY_OK; - val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - hardware_disable(NULL); + hardware_disable(); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - hardware_enable(NULL); + hardware_enable(); break; } return NOTIFY_OK; @@ -3054,10 +3032,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -3071,12 +3049,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -3131,7 +3109,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - WARN_ON(raw_spin_is_locked(&kvm_lock)); + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); hardware_enable_nolock(NULL); } } diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..ca4260e35037 --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,264 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/errno.h> +#include <linux/file.h> +#include <linux/kvm_host.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; + bool noncoherent; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +/* + * Groups can use the same or different IOMMU domains. If the same then + * adding a new group may change the coherency of groups we've previously + * been told about. We don't want to care about any of that so we retest + * each group and bail as soon as we find one that's noncoherent. This + * means we only ever [un]register_noncoherent_dma once for the whole device. + */ +static void kvm_vfio_update_coherency(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + bool noncoherent = false; + struct kvm_vfio_group *kvg; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + /* + * TODO: We need an interface to check the coherency of + * the IOMMU domain this group is using. For now, assume + * it's always noncoherent. + */ + noncoherent = true; + break; + } + + if (noncoherent != kv->noncoherent) { + kv->noncoherent = noncoherent; + + if (kv->noncoherent) + kvm_arch_register_noncoherent_dma(dev->kvm); + else + kvm_arch_unregister_noncoherent_dma(dev->kvm); + } + + mutex_unlock(&kv->lock); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + kvm_vfio_update_coherency(dev); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + kvm_vfio_update_coherency(dev); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kvm_vfio_update_coherency(dev); + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; |